Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Entities;
import java.util.ArrayList;
import java.util.List;
/**
* Readers the input stream into tokens.
*/
class Tokeniser {
static final char replacementChar = '\uFFFD'; // replaces null character
private CharacterReader reader; // html input
private ParseErrorList errors; // errors found while tokenising
private TokeniserState state = TokeniserState.Data; // current tokenisation state
private Token emitPending; // the token we are about to emit on next read
private boolean isEmitPending = false;
private StringBuilder charBuffer = new StringBuilder(); // buffers characters to output as one token
StringBuilder dataBuffer; // buffers data looking for </script>
Token.Tag tagPending; // tag we are building up
Token.Doctype doctypePending; // doctype building up
Token.Comment commentPending; // comment building up
private Token.StartTag lastStartTag; // the last start tag emitted, to test appropriate end tag
private boolean selfClosingFlagAcknowledged = true;
Tokeniser(CharacterReader reader, ParseErrorList errors) {
this.reader = reader;
this.errors = errors;
}
Token read() {
if (!selfClosingFlagAcknowledged) {
error("Self closing flag not acknowledged");
selfClosingFlagAcknowledged = true;
}
while (!isEmitPending)
state.read(this, reader);
// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
if (charBuffer.length() > 0) {
String str = charBuffer.toString();
charBuffer.delete(0, charBuffer.length());
return new Token.Character(str);
} else {
isEmitPending = false;
return emitPending;
}
}
void emit(Token token) {
Validate.isFalse(isEmitPending, "There is an unread token pending!");
emitPending = token;
isEmitPending = true;
if (token.type == Token.TokenType.StartTag) {
Token.StartTag startTag = (Token.StartTag) token;
lastStartTag = startTag;
if (startTag.selfClosing
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>)
selfClosingFlagAcknowledged = false;
} else if (token.type == Token.TokenType.EndTag) {
Token.EndTag endTag = (Token.EndTag) token;
if (endTag.attributes != null)
error("Attributes incorrectly present on end tag");
}
}
void emit(String str) {
// buffer strings up until last string token found, to emit only one token for a run of character refs etc.
// does not set isEmitPending; read checks that
charBuffer.append(str);
}
void emit(char c) {
charBuffer.append(c);
}
TokeniserState getState() {
return state;
}
void transition(TokeniserState state) {
this.state = state;
}
void advanceTransition(TokeniserState state) {
reader.advance();
this.state = state;
}
void acknowledgeSelfClosingFlag() {
selfClosingFlagAcknowledged = true;
}
Character consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
if (reader.isEmpty())
return null;
if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
return null;
if (reader.matchesAny('\t', '\n', '\r', '\f', ' ', '<', '&'))
return null;
reader.mark();
if (reader.matchConsume("#")) { // numbered
boolean isHexMode = reader.matchConsumeIgnoreCase("X");
String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
if (numRef.length() == 0) { // didn't match anything
characterReferenceError("numeric reference with no numerals");
reader.rewindToMark();
return null;
}
if (!reader.matchConsume(";"))
Token.Tag createTagPending(boolean start) {
tagPending = start ? new Token.StartTag() : new Token.EndTag();
return tagPending;
}
void emitTagPending() {
tagPending.finaliseTag();
emit(tagPending);
}
void createCommentPending() {
commentPending = new Token.Comment();
}
void emitCommentPending() {
emit(commentPending);
}
void createDoctypePending() {
doctypePending = new Token.
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Doctype();
}
void emitDoctypePending() {
emit(doctypePending);
}
void createTempBuffer() {
dataBuffer = new StringBuilder();
}
boolean isAppropriateEndTagToken() {
if (lastStartTag == null)
return false;
return tagPending.tagName.equals(lastStartTag.tagName);
}
String appropriateEndTagName() {
return lastStartTag.tagName;
}
void error(TokeniserState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpected character '%s' in input state [%s]", reader.current(), state));
}
void eofError(TokeniserState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpectedly reached end of file (EOF) in input state [%s]", state));
}
private void characterReferenceError(String message) {
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> public enum EscapeMode {
/** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */
xhtml(xhtmlByVal),
/** Default HTML output entities. */
base(baseByVal),
/** Complete HTML entities. */
extended(fullByVal);
private Map<Character, String> map;
EscapeMode(Map<Character, String> map) {
this.map = map;
}
public Map<Character, String> getMap() {
return map;
}
}
private static final Map<String, Character> full;
private static final Map<Character, String> xhtmlByVal;
private static final Map<String, Character> base;
private static final Map<Character, String> baseByVal;
StringBuilder accum = new StringBuilder(string.length() * 2);
Map<Character, String> map = escapeMode.getMap();
for (int pos = 0; pos < string.length(); pos++) {
Character c = string.charAt(pos);
if (map.containsKey(c))
baseByVal = toCharacterKey(base);
full = loadEntities("entities-full.properties"); // extended and overblown.
fullByVal = toCharacterKey(full);
for (Object[] entity : xhtmlArray) {
Character c = Character.valueOf((char) ((Integer) entity[1]).intValue());
xhtmlByVal.put(c, ((String) entity[0]));
}
}
private static Map<String, Character> loadEntities(String filename) {
Properties properties = new Properties();
Map<String, Character> entities = new HashMap<String, Character>();
try {
InputStream in = Entities.class.getResourceAsStream(filename);
properties.load(in);
in.close();
} catch (IOException e) {
throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename);
}
for (Map.Entry entry: properties.entrySet()) {
Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16));
String name = (String) entry.getKey();
entities.put(name, val);
}
return entities;
}
private static Map<Character, String> toCharacterKey(Map<String, Character
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import java.util.List;
/**
* Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the more convenient parse methods
* in {@link org.jsoup.Jsoup}.
*/
public class Parser {
private static final int DEFAULT_MAX_ERRORS = 0; // by default, error tracking is disabled.
private TreeBuilder treeBuilder;
private int maxErrors = DEFAULT_MAX_ERRORS;
private ParseErrorList errors;
/**
* Create a new Parser, using the specified TreeBuilder
* @param treeBuilder TreeBuilder to use to parse input into Documents.
*/
public Parser(TreeBuilder treeBuilder) {
this.treeBuilder = treeBuilder;
}
public Document parseInput(String html, String baseUri) {
errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking();
Document doc = treeBuilder.parse(html, baseUri, errors);
return doc;
}
// gets & sets
/**
* Get the TreeBuilder currently in use.
* @return current TreeBuilder.
*/
public TreeBuilder getTreeBuilder() {
return treeBuilder;
}
/**
* Update the TreeBuilder used when parsing content.
* @param treeBuilder current TreeBuilder
* @return this, for chaining
*/
public Parser setTreeBuilder(TreeBuilder treeBuilder) {
this.treeBuilder = treeBuilder;
return this;
}
/**
* Check if parse error tracking is enabled.
* @return current track error state.
*/
public boolean isTrackErrors() {
return maxErrors > 0;
}
/**
* Enable or disable parse error tracking for the next parse.
* @param maxErrors the maximum number of errors to track. Set to 0 to disable.
* @return this, for chaining
*/
public Parser setTrackErrors(int maxErrors) {
this.maxErrors = maxErrors;
return this;
}
/**
* Retrieve the parse errors, if any, from the last parse.
* @return list of parse errors, up to
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> the size of the maximum errors tracked.
*/
public List<ParseError> getErrors() {
return errors;
}
// static parse functions below
/**
* Parse HTML into a Document.
*
* @param html HTML to parse
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* @return parsed Document
*/
public static Document parse(String html, String baseUri) {
TreeBuilder treeBuilder = new HtmlTreeBuilder();
return treeBuilder.parse(html, baseUri, ParseErrorList.noTracking());
}
/**
* Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
*
* @param fragmentHtml the fragment of HTML to parse
* @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
* provides stack context (for implicit element creation).
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
*/
public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) {
HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();
return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking());
}
/**
* Parse a fragment of HTML into the {@code body} of a Document.
*
* @param bodyHtml fragment of HTML
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document,
* based on a knowledge of the semantics of the incoming tags.
* @return a new HTML parser.
*/
public static Parser htmlParser() {
return new Parser(new HtmlTreeBuilder());
}
/**
* Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML,
* rather creates a simple tree directly from the input.
* @return a new simple XML parser.
*/
public static Parser
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> xmlParser() {
return new Parser(new XmlTreeBuilder());
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.Validate;
import java.util.Locale;
/**
CharacterReader consumes tokens off a string. To replace the old TokenQueue.
*/
class CharacterReader {
static final char EOF = (char) -1;
private final char[] input;
private final int length;
private int pos = 0;
private int mark = 0;
CharacterReader(String input) {
Validate.notNull(input);
this.input = input.toCharArray();
this.length = this.input.length;
}
int pos() {
return pos;
}
boolean isEmpty() {
return pos >= length;
}
char current() {
return isEmpty() ? EOF : input[pos];
}
char consume() {
char val = isEmpty() ? EOF : input[pos];
pos++;
return val;
}
void unconsume() {
pos--;
}
void advance() {
pos++;
}
void mark() {
mark = pos;
}
void rewindToMark() {
pos = mark;
}
String consumeAsString() {
return new String(input, pos++, 1);
}
/**
* Returns the number of characters between the current position and the next instance of the input char
* @param c scan target
* @return offset between current position and next instance of target. -1 if not found.
*/
int nextIndexOf(char c) {
// doesn't handle scanning for surrogates
for (int i = pos; i < length; i++) {
if (c == input[i])
return i - pos;
}
return -1;
}
/**
* Returns the number of characters between the current position and the next instance of the input sequence
*
* @param seq scan target
* @return offset between current position and next instance of target. -1 if not found.
*/
int nextIndexOf(CharSequence seq) {
// doesn't handle scanning for surrogates
char startChar = seq.charAt(0);
for (int offset = pos; offset < length; offset++) {
// scan to first instance of startchar:
if (startChar != input[offset])
while(++offset < length && startChar != input[offset]);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> if (offset < length) {
int i = offset + 1;
int last = i + seq.length()-1;
for (int j = 1; i < last && seq.charAt(j) == input[i]; i++, j++);
if (i == last) // found full sequence
return offset - pos;
}
}
return -1;
}
String consumeTo(char c) {
int offset = nextIndexOf(c);
if (offset != -1) {
String consumed = new String(input, pos, offset);
pos += offset;
return consumed;
} else {
return consumeToEnd();
}
}
String consumeTo(String seq) {
int offset = nextIndexOf(seq);
if (offset != -1) {
String consumed = new String(input, pos, offset);
pos += offset;
return consumed;
} else {
return consumeToEnd();
}
}
String consumeToAny(final char... chars) {
int start = pos;
OUTER: while (pos < length) {
for (int i = 0; i < chars.length; i++) {
if (input[pos] == chars[i])
break OUTER;
}
pos++;
}
return pos > start ? new String(input, start, pos-start) : "";
}
String consumeToEnd() {
String data = new String(input, pos, length-pos);
pos = length;
return data;
}
String consumeLetterSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
pos++;
else
break;
}
return new String(input, start, pos - start);
}
String consumeLetterThenDigitSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
pos++;
else
break;
}
while (!isEmpty()) {
char c = input[pos];
if (c >= '0' && c <= '9')
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
pos++;
else
break;
}
return new String(input, start, pos - start);
}
String consumeHexSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
pos++;
else
break;
}
return new String(input, start, pos - start);
}
String consumeDigitSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if (c >= '0' && c <= '9')
pos++;
else
break;
}
return new String(input, start, pos - start);
}
boolean matches(char c) {
return !isEmpty() && input[pos] == c;
}
boolean matches(String seq) {
int scanLength = seq.length();
if (scanLength > length - pos)
return false;
for (int offset = 0; offset < scanLength; offset++)
if (seq.charAt(offset) != input[pos+offset])
return false;
return true;
}
boolean matchesIgnoreCase(String seq) {
int scanLength = seq.length();
if (scanLength > length - pos)
return false;
for (int offset = 0; offset < scanLength; offset++) {
char upScan = Character.toUpperCase(seq.charAt(offset));
char upTarget = Character.toUpperCase(input[pos + offset]);
if (upScan != upTarget)
return false;
}
return true;
}
boolean matchesAny(char... seq) {
if (isEmpty())
return false;
char c = input[pos];
for (char seek : seq) {
if (seek == c)
return true;
}
return false;
}
boolean matchesLetter() {
if (isEmpty())
return false;
char c = input[pos];
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
boolean matchesDigit() {
if (isEmpty())
return false;
char c =
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> input[pos];
return (c >= '0' && c <= '9');
}
boolean matchConsume(String seq) {
if (matches(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
boolean matchConsumeIgnoreCase(String seq) {
if (matchesIgnoreCase(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
boolean containsIgnoreCase(String seq) {
// used to check presence of </title>, </style>. only finds consistent case.
String loScan = seq.toLowerCase(Locale.ENGLISH);
String hiScan = seq.toUpperCase(Locale.ENGLISH);
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1);
}
@Override
public String toString() {
return new String(input, pos, length - pos);
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.select;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
/**
* Collects a list of elements that match the supplied criteria.
*
* @author Jonathan Hedley
*/
public class Collector {
private Collector() {
}
/**
Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
@param eval Evaluator to test elements against
@param root root of tree to descend
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
Elements elements = new Elements();
new NodeTraversor(new Accumulator(root, elements, eval)).traverse(root);
return elements;
}
private static class Accumulator implements NodeVisitor {
private final Element root;
private final Elements elements;
private final Evaluator eval;
Accumulator(Element root, Elements elements, Evaluator eval) {
this.root = root;
this.elements = elements;
this.eval = eval;
}
public void head(Node node, int depth) {
if (node instanceof Element) {
Element el = (Element) node;
if (eval.matches(root, el))
elements.add(el);
}
}
public void tail(Node node, int depth) {
// void
}
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
/**
A data node, for contents of style, script tags etc, where contents should not show in text().
@author Jonathan Hedley, jonathan@hedley.net */
public class DataNode extends Node{
private static final String DATA_KEY = "data";
/**
Create a new DataNode.
@param data data contents
@param baseUri base URI
*/
public DataNode(String data, String baseUri) {
super(baseUri);
attributes.put(DATA_KEY, data);
}
public String nodeName() {
return "#data";
}
/**
Get the data contents of this node. Will be unescaped and with original new lines, space etc.
@return data
*/
public String getWholeData() {
return attributes.get(DATA_KEY);
}
/**
* Set the data contents of this node.
* @param data unencoded data
* @return this node, for chaining
*/
public DataNode setWholeData(String data) {
attributes.put(DATA_KEY, data);
return this;
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain
}
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
public String toString() {
return outerHtml();
}
/**
Create a new DataNode from HTML encoded data.
@param encodedData encoded data
@param baseUri bass URI
@return new DataNode
*/
public static DataNode createFromEncoded(String encodedData, String baseUri) {
String data = Entities.unescape(encodedData);
return new DataNode(data, baseUri);
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> el = tb.insertEmpty(start);
// jsoup special: update base the frist time it is seen
if (name.equals("base") && el.hasAttr("href"))
tb.maybeSetBaseUri(el);
} else if (name.equals("meta")) {
Element meta = tb.insertEmpty(start);
// todo: charset switches
} else if (name.equals("title")) {
handleRcData(start, tb);
} else if (StringUtil.in(name, "noframes", "style")) {
handleRawtext(start, tb);
} else if (name.equals("noscript")) {
// else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
tb.insert(start);
tb.transition(InHeadNoscript);
} else if (name.equals("script")) {
// skips some script rules as won't execute them
tb.insert(start);
tb.tokeniser.transition(TokeniserState.ScriptData);
tb.markInsertionMode();
tb.transition(Text);
} else if (name.equals("head")) {
tb.error(this);
return false;
} else {
return anythingElse(t, tb);
}
break;
case EndTag:
Token.EndTag end = t.asEndTag();
name = end.name();
if (name.equals("head")) {
tb.pop();
tb.transition(AfterHead);
} else if (StringUtil.in(name, "body", "html", "br")) {
return anythingElse(t, tb);
} else {
tb.error(this);
return false;
}
break;
default:
return anythingElse(t, tb);
}
return true;
}
private boolean anythingElse(Token t, TreeBuilder tb) {
tb.process(new Token.EndTag("head"));
return tb.process(t);
}
},
InHeadNoscript {
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isDoctype()) {
tb.error(this);
} else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>) && !StringUtil.in(el.nodeName(), "address", "div", "p"))
break;
}
if (tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
tb.insert(startTag);
} else if (name.equals("plaintext")) {
if (tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
tb.insert(startTag);
tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out
} else if (name.equals("button")) {
if (tb.inButtonScope("button")) {
// close and reprocess
tb.error(this);
tb.process(new Token.EndTag("button"));
tb.process(startTag);
} else {
tb.reconstructFormattingElements();
tb.insert(startTag);
tb.framesetOk(false);
}
} else if (name.equals("a")) {
if (tb.getActiveFormattingElement("a") != null) {
tb.error(this);
tb.process(new Token.EndTag("a"));
// still on stack?
Element remainingA = tb.getFromStack("a");
if (remainingA != null) {
tb.removeFromActiveFormattingElements(remainingA);
tb.removeFromStack(remainingA);
}
}
tb.reconstructFormattingElements();
Element a = tb.insert(startTag);
tb.pushActiveFormattingElements(a);
} else if (StringUtil.in(name,
"b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) {
tb.reconstructFormattingElements();
Element el = tb.insert(startTag);
tb.pushActiveFormattingElements(el);
} else if (name.equals("nobr")) {
tb.reconstructFormattingElements();
if (tb.inScope("nobr")) {
tb.error(this);
tb.process(new Token.EndTag("nobr"));
tb.reconstructFormattingElements();
}
Element el = tb.insert(startTag);
tb.pushActiveFormattingElements
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>(el);
} else if (StringUtil.in(name, "applet", "marquee", "object")) {
tb.reconstructFormattingElements();
tb.insert(startTag);
tb.insertMarkerToFormattingElements();
tb.framesetOk(false);
} else if (name.equals("table")) {
if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
tb.insert(startTag);
tb.framesetOk(false);
tb.transition(InTable);
} else if (StringUtil.in(name, "area", "br", "embed", "img", "keygen", "wbr")) {
tb.reconstructFormattingElements();
tb.insertEmpty(startTag);
tb.framesetOk(false);
} else if (name.equals("input")) {
tb.reconstructFormattingElements();
Element el = tb.insertEmpty(startTag);
if (!el.attr("type").equalsIgnoreCase("hidden"))
tb.framesetOk(false);
} else if (StringUtil.in(name, "param", "source", "track")) {
tb.insertEmpty(startTag);
} else if (name.equals("hr")) {
if (tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
tb.insertEmpty(startTag);
tb.framesetOk(false);
} else if (name.equals("image")) {
// we're not supposed to ask.
startTag.name("img");
return tb.process(startTag);
} else if (name.equals("isindex")) {
// how much do we care about the early 90s?
tb.error(this);
if (tb.getFormElement() != null)
return false;
tb.tokeniser.acknowledgeSelfClosingFlag();
tb.process(new Token.StartTag("form"));
if (startTag.attributes.hasKey("action")) {
Element form = tb.getFormElement();
form.attr("action", startTag.attributes.get("action"));
}
tb.process(new Token.StartTag("
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>hr"));
tb.process(new Token.StartTag("label"));
// hope you like english.
String prompt = startTag.attributes.hasKey("prompt") ?
startTag.attributes.get("prompt") :
"This is a searchable index. Enter search keywords: ";
tb.process(new Token.Character(prompt));
// input
Attributes inputAttribs = new Attributes();
for (Attribute attr : startTag.attributes) {
if (!StringUtil.in(attr.getKey(), "name", "action", "prompt"))
inputAttribs.put(attr);
}
inputAttribs.put("name", "isindex");
tb.process(new Token.StartTag("input", inputAttribs));
tb.process(new Token.EndTag("label"));
tb.process(new Token.StartTag("hr"));
tb.process(new Token.EndTag("form"));
} else if (name.equals("textarea")) {
tb.insert(startTag);
// todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
tb.tokeniser.transition(TokeniserState.Rcdata);
tb.markInsertionMode();
tb.framesetOk(false);
tb.transition(Text);
} else if (name.equals("xmp")) {
if (tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
tb.reconstructFormattingElements();
tb.framesetOk(false);
handleRawtext(startTag, tb);
} else if (name.equals("iframe")) {
tb.framesetOk(false);
handleRawtext(startTag, tb);
} else if (name.equals("noembed")) {
// also handle noscript if script enabled
handleRawtext(startTag, tb);
} else if (name.equals("select")) {
tb.reconstructFormattingElements();
tb.insert(startTag);
tb.framesetOk(false);
HtmlTreeBuilderState state = tb.state();
if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTable
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Body) || state.equals(InRow) || state.equals(InCell))
tb.transition(InSelectInTable);
else
tb.transition(InSelect);
} else if (StringUtil.in("optgroup", "option")) {
if (tb.currentElement().nodeName().equals("option"))
tb.process(new Token.EndTag("option"));
tb.reconstructFormattingElements();
tb.insert(startTag);
} else if (StringUtil.in("rp", "rt")) {
if (tb.inScope("ruby")) {
tb.generateImpliedEndTags();
if (!tb.currentElement().nodeName().equals("ruby")) {
tb.error(this);
tb.popStackToBefore("ruby"); // i.e. close up to but not include name
}
tb.insert(startTag);
}
} else if (name.equals("math")) {
tb.reconstructFormattingElements();
// todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
tb.insert(startTag);
tb.tokeniser.acknowledgeSelfClosingFlag();
} else if (name.equals("svg")) {
tb.reconstructFormattingElements();
// todo: handle A start tag whose tag name is "svg" (xlink, svg)
tb.insert(startTag);
tb.tokeniser.acknowledgeSelfClosingFlag();
} else if (StringUtil.in(name,
"caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr")) {
tb.error(this);
return false;
} else {
tb.reconstructFormattingElements();
tb.insert(startTag);
}
break;
case EndTag:
Token.EndTag endTag = t.asEndTag();
name = endTag.name();
if (name.equals("body")) {
if (!tb.inScope("body")) {
tb.error(this);
return false;
} else {
// todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
tb.transition(AfterBody);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Body);
return tb.process(t);
}
return true;
}
},
AfterAfterFrameset {
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isComment()) {
tb.insert(t.asComment());
} else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
return tb.process(t, InBody);
} else if (t.isEOF()) {
// nice work chuck
} else if (t.isStartTag() && t.asStartTag().name().equals("noframes")) {
return tb.process(t, InHead);
} else {
tb.error(this);
return false;
}
return true;
}
},
ForeignContent {
boolean process(Token t, HtmlTreeBuilder tb) {
return true;
// todo: implement. Also; how do we get here?
}
};
private static String nullString = String.valueOf('\u0000');
abstract boolean process(Token t, HtmlTreeBuilder tb);
private static boolean isWhitespace(Token t) {
if (t.isCharacter()) {
String data = t.asCharacter().getData();
// todo: this checks more than spec - "\t", "\n", "\f", "\r", " "
for (int i = 0; i < data.length(); i++) {
char c = data.charAt(i);
if (!StringUtil.isWhitespace(c))
return false;
}
return true;
}
return false;
}
private static void handleRcData(Token.StartTag startTag, HtmlTreeBuilder tb) {
tb.insert(startTag);
tb.tokeniser.transition(TokeniserState.Rcdata);
tb.markInsertionMode();
tb.transition(Text);
}
private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) {
tb.insert(startTag);
tb.tokeniser.transition(TokeniserState.Rawtext);
tb.markInsertionMode();
tb.transition(Text);
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;
import org.jsoup.helper.DataUtil;
import org.jsoup.helper.HttpConnection;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
/**
The core public access point to the jsoup functionality.
@author Jonathan Hedley */
public class Jsoup {
private Jsoup() {}
/**
Parse HTML into a Document. The parser will make a sensible, balanced document tree out of any HTML.
@param html HTML to parse
@param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
before the HTML declares a {@code <base href>} tag.
@return sane HTML
*/
public static Document parse(String html, String baseUri) {
return Parser.parse(html, baseUri);
}
/**
Parse HTML into a Document, using the provided Parser. You can provide an alternate parser, such as a simple XML
(non-HTML) parser.
@param html HTML to parse
@param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
before the HTML declares a {@code <base href>} tag.
@param parser alternate {@link Parser#xmlParser() parser} to use.
@return sane HTML
*/
public static Document parse(String html, String baseUri, Parser parser) {
return parser.parseInput(html, baseUri);
}
/**
Parse HTML into a Document. As no base URI is specified, absolute URL detection relies on the HTML including a
{@code <base href>} tag.
@param html HTML to parse
@return sane HTML
@see #parse(String, String)
*/
public static Document parse(String html) {
return Parser.parse(html, "");
}
/**
* Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML page.
* <p>
* Use examples:
* <ul>
*
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> in input stream to read. Make sure to close it after parsing.
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
@return sane HTML
@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
*/
public static Document parse(InputStream in, String charsetName, String baseUri) throws IOException {
return DataUtil.load(in, charsetName, baseUri);
}
/**
Read an input stream, and parse it to a Document. You can provide an alternate parser, such as a simple XML
(non-HTML) parser.
@param in input stream to read. Make sure to close it after parsing.
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
@param parser alternate {@link Parser#xmlParser() parser} to use.
@return sane HTML
@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
*/
public static Document parse(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
return DataUtil.load(in, charsetName, baseUri, parser);
}
/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
@param bodyHtml body HTML fragment
@param baseUri URL to resolve relative URLs against.
@return sane HTML document
@see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
return Parser.parseBodyFragment(bodyHtml, baseUri);
}
/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
@param bodyHtml body HTML fragment
@return sane HTML document
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
@see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml) {
return Parser.parseBodyFragment(bodyHtml, "");
}
/**
Fetch a URL, and parse it as HTML. Provided for compatibility; in most cases use {@link #connect(String)} instead.
<p>
The encoding character set is determined by the content-type header or http-equiv meta tag, or falls back to {@code UTF-8}.
@param url URL to fetch (with a GET). The protocol must be {@code http} or {@code https}.
@param timeoutMillis Connection and read timeout, in milliseconds. If exceeded, IOException is thrown.
@return The parsed HTML.
@throws IOException If the final server response != 200 OK (redirects are followed), or if there's an error reading
the response stream.
@see #connect(String)
*/
public static Document parse(URL url, int timeoutMillis) throws IOException {
Connection con = HttpConnection.connect(url);
con.timeout(timeoutMillis);
return con.get();
}
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, Whitelist whitelist) {
return clean(bodyHtml, "", whitelist);
}
/**
* Get safe HTML from untrusted input HTML,
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.select;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
/**
* Base combining (and, or) evaluator.
*/
abstract class CombiningEvaluator extends Evaluator {
final List<Evaluator> evaluators;
CombiningEvaluator() {
super();
evaluators = new ArrayList<Evaluator>();
}
CombiningEvaluator(Collection<Evaluator> evaluators) {
this();
this.evaluators.addAll(evaluators);
}
Evaluator rightMostEvaluator() {
return evaluators.size() > 0 ? evaluators.get(evaluators.size() - 1) : null;
}
void replaceRightMostEvaluator(Evaluator replacement) {
evaluators.set(evaluators.size() - 1, replacement);
}
static final class And extends CombiningEvaluator {
And(Collection<Evaluator> evaluators) {
super(evaluators);
}
And(Evaluator... evaluators) {
this(Arrays.asList(evaluators));
}
@Override
public boolean matches(Element root, Element node) {
for (int i = 0; i < evaluators.size(); i++) {
Evaluator s = evaluators.get(i);
if (!s.matches(root, node))
return false;
}
return true;
}
@Override
public String toString() {
return StringUtil.join(evaluators, " ");
}
}
static final class Or extends CombiningEvaluator {
/**
* Create a new Or evaluator. The initial evaluators are ANDed together and used as the first clause of the OR.
* @param evaluators initial OR clause (these are wrapped into an AND evaluator).
*/
Or(Collection<Evaluator> evaluators) {
super();
if (evaluators.size() > 1)
this.evaluators.add(new And(evaluators));
else // 0 or 1
this.evaluators.addAll(evaluators);
}
Or() {
super();
}
public void add(Evaluator e) {
evaluators.add(e);
}
@Override
public
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> boolean matches(Element root, Element node) {
for (int i = 0; i < evaluators.size(); i++) {
Evaluator s = evaluators.get(i);
if (s.matches(root, node))
return true;
}
return false;
}
@Override
public String toString() {
return String.format(":or%s", evaluators);
}
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
/**
* A character queue with parsing helpers.
*
* @author Jonathan Hedley
*/
public class TokenQueue {
private String queue;
private int pos = 0;
private static final char ESC = '\\'; // escape char for chomp balanced.
/**
Create a new TokenQueue.
@param data string of data to back queue.
*/
public TokenQueue(String data) {
Validate.notNull(data);
queue = data;
}
/**
* Is the queue empty?
* @return true if no data left in queue.
*/
public boolean isEmpty() {
return remainingLength() == 0;
}
private int remainingLength() {
return queue.length() - pos;
}
/**
* Retrieves but does not remove the first character from the queue.
* @return First character, or 0 if empty.
*/
public char peek() {
return isEmpty() ? 0 : queue.charAt(pos);
}
/**
Add a character to the start of the queue (will be the next character retrieved).
@param c character to add
*/
public void addFirst(Character c) {
addFirst(c.toString());
}
/**
Add a string to the start of the queue.
@param seq string to add.
*/
public void addFirst(String seq) {
// not very performant, but an edge case
queue = seq + queue.substring(pos);
pos = 0;
}
/**
* Tests if the next characters on the queue match the sequence. Case insensitive.
* @param seq String to check queue for.
* @return true if the next characters match.
*/
public boolean matches(String seq) {
return queue.regionMatches(true, pos, seq, 0, seq.length());
}
/**
* Case sensitive match test.
* @param seq string to case sensitively check for
* @return true if matched, false if not
*/
public boolean matchesCS(String seq) {
return queue.startsWith(seq, pos);
}
/**
Tests if the next characters match any of the sequences. Case insensitive.
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
@param seq list of strings to case insensitively check for
@return true of any matched, false if none did
*/
public boolean matchesAny(String... seq) {
for (String s : seq) {
if (matches(s))
return true;
}
return false;
}
public boolean matchesAny(char... seq) {
if (isEmpty())
return false;
for (char c: seq) {
if (queue.charAt(pos) == c)
return true;
}
return false;
}
public boolean matchesStartTag() {
// micro opt for matching "<x"
return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));
}
/**
* Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
* queue.
* @param seq String to search for, and if found, remove from queue.
* @return true if found and removed, false if not found.
*/
public boolean matchChomp(String seq) {
if (matches(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
/**
Tests if queue starts with a whitespace character.
@return if starts with whitespace
*/
public boolean matchesWhitespace() {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
}
/**
Test if the queue matches a word character (letter or digit).
@return if matches a word character
*/
public boolean matchesWord() {
return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
}
/**
* Drops the next character off the queue.
*/
public void advance() {
if (!isEmpty()) pos++;
}
/**
* Consume one character off queue.
* @return first character on queue.
*/
public char consume() {
return queue.charAt(pos++);
}
/**
* Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
* throw an illegal state exception -- but you should be running match() against that condition.
<p>
Case insensitive.
* @param seq sequence
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> to remove from head of queue.
*/
public void consume(String seq) {
if (!matches(seq))
throw new IllegalStateException("Queue did not match expected sequence");
int len = seq.length();
if (len > remainingLength())
throw new IllegalStateException("Queue not long enough to consume sequence");
pos += len;
}
/**
* Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
* @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
public String consumeTo(String seq) {
int offset = queue.indexOf(seq, pos);
if (offset != -1) {
String consumed = queue.substring(pos, offset);
pos += consumed.length();
return consumed;
} else {
return remainder();
}
}
public String consumeToIgnoreCase(String seq) {
int start = pos;
String first = seq.substring(0, 1);
boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
while (!isEmpty()) {
if (matches(seq))
break;
if (canScan) {
int skip = queue.indexOf(first, pos) - pos;
if (skip == 0) // this char is the skip char, but not match, so force advance of pos
pos++;
else if (skip < 0) // no chance of finding, grab to end
pos = queue.length();
else
pos += skip;
}
else
pos++;
}
String data = queue.substring(start, pos);
return data;
}
/**
Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
@param seq any number of terminators to consume to. <b>Case insensitive.</b>
@return consumed string
*/
// todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
// is is a case sensitive time...
public String consumeToAny(String... seq) {
int start = pos;
while (!isEmpty
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>() && !matchesAny(seq)) {
pos++;
}
String data = queue.substring(start, pos);
return data;
}
/**
* Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
* <p>
* If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
* isEmpty() == true).
* @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
public String chompTo(String seq) {
String data = consumeTo(seq);
matchChomp(seq);
return data;
}
public String chompToIgnoreCase(String seq) {
String data = consumeToIgnoreCase(seq); // case insensitive scan
matchChomp(seq);
return data;
}
/**
* Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
* and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
* in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
* contains text strings; use unescape for that.
* @param open opener
* @param close closer
* @return data matched from the queue
*/
public String chompBalanced(char open, char close) {
StringBuilder accum = new StringBuilder();
int depth = 0;
char last = 0;
do {
if (isEmpty()) break;
Character c = consume();
if (last == 0 || last != ESC) {
if (c.equals(open))
depth++;
else if (c.equals(close))
depth--;
}
if (depth > 0 && last != 0)
accum.append(c); // don't include the outer match pair in the return
last = c;
} while (depth > 0);
return accum.toString();
}
/**
* Unescaped a \ escaped string.
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
* @param in backslash escaped string
* @return unescaped string
*/
public static String unescape(String in) {
StringBuilder out = new StringBuilder();
char last = 0;
for (char c : in.toCharArray()) {
if (c == ESC) {
if (last != 0 && last == ESC)
out.append(c);
}
else
out.append(c);
last = c;
}
return out.toString();
}
/**
* Pulls the next run of whitespace characters of the queue.
*/
public boolean consumeWhitespace() {
boolean seen = false;
while (matchesWhitespace()) {
pos++;
seen = true;
}
return seen;
}
/**
* Retrieves the next run of word type (letter or digit) off the queue.
* @return String of word characters from queue, or empty string if none.
*/
public String consumeWord() {
int start = pos;
while (matchesWord())
pos++;
return queue.substring(start, pos);
}
/**
* Consume an tag name off the queue (word or :, _, -)
*
* @return tag name
*/
public String consumeTagName() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
pos++;
return queue.substring(start, pos);
}
/**
* Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
*
* @return tag name
*/
public String consumeElementSelector() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
pos++;
return queue.substring(start, pos);
}
/**
Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
@return identifier
*/
public String consumeCssIdentifier() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
pos++;
return queue.substring(start, pos);
}
/**
Consume an attribute key off the
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> queue (letter, digit, -, _, :")
@return attribute key
*/
public String consumeAttributeKey() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
pos++;
return queue.substring(start, pos);
}
/**
Consume and return whatever is left on the queue.
@return remained of queue.
*/
public String remainder() {
StringBuilder accum = new StringBuilder();
while (!isEmpty()) {
accum.append(consume());
}
return accum.toString();
}
public String toString() {
return queue.substring(pos);
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;
import java.util.Iterator;
/**
* @author Jonathan Hedley
*/
public class XmlTreeBuilder extends TreeBuilder {
@Override
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
super.initialiseParse(input, baseUri, errors);
stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
}
@Override
protected boolean process(Token token) {
// start tag, end tag, doctype, comment, character, eof
switch (token.type) {
case StartTag:
insert(token.asStartTag());
break;
case EndTag:
popStackToClose(token.asEndTag());
break;
case Comment:
insert(token.asComment());
break;
case Character:
insert(token.asCharacter());
break;
case Doctype:
insert(token.asDoctype());
break;
case EOF: // could put some normalisation here if desired
break;
default:
Validate.fail("Unexpected token type: " + token.type);
}
return true;
}
private void insertNode(Node node) {
currentElement().appendChild(node);
}
Element insert(Token.StartTag startTag) {
Tag tag = Tag.valueOf(startTag.name());
// todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
Element el = new Element(tag, baseUri, startTag.attributes);
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above.
tag.setSelfClosing();
} else {
stack.add(el);
}
return el;
}
void insert(Token.Comment commentToken) {
Comment comment = new Comment(commentToken.getData(), baseUri);
insertNode(comment);
}
void insert(Token.Character characterToken) {
Node node = new TextNode(characterToken.getData(), baseUri);
insert
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
/**
* A {@code <!DOCTPYE>} node.
*/
public class DocumentType extends Node {
// todo: quirk mode from publicId and systemId
/**
* Create a new doctype element.
* @param name the doctype's name
* @param publicId the doctype's public ID
* @param systemId the doctype's system ID
* @param baseUri the doctype's base URI
*/
public DocumentType(String name, String publicId, String systemId, String baseUri) {
super(baseUri);
Validate.notEmpty(name);
attr("name", name);
attr("publicId", publicId);
attr("systemId", systemId);
}
@Override
public String nodeName() {
return "#doctype";
}
@Override
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append("<!DOCTYPE ").append(attr("name"));
if (!StringUtil.isBlank(attr("publicId")))
accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
if (!StringUtil.isBlank(attr("systemId")))
accum.append(" \"").append(attr("systemId")).append("\"");
accum.append('>');
}
@Override
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
/**
* Parse tokens for the Tokeniser.
*/
abstract class Token {
TokenType type;
private Token() {
}
String tokenType() {
return this.getClass().getSimpleName();
}
static class Doctype extends Token {
final StringBuilder name = new StringBuilder();
final StringBuilder publicIdentifier = new StringBuilder();
final StringBuilder systemIdentifier = new StringBuilder();
boolean forceQuirks = false;
Doctype() {
type = TokenType.Doctype;
}
String getName() {
return name.toString();
}
String getPublicIdentifier() {
return publicIdentifier.toString();
}
public String getSystemIdentifier() {
return systemIdentifier.toString();
}
public boolean isForceQuirks() {
return forceQuirks;
}
}
static abstract class Tag extends Token {
protected String tagName;
private String pendingAttributeName; // attribute names are generally caught in one hop, not accumulated
private StringBuilder pendingAttributeValue; // but values are accumulated, from e.g. & in hrefs
boolean selfClosing = false;
Attributes attributes; // start tags get attributes on construction. End tags get attributes on first new attribute (but only for parser convenience, not used).
void newAttribute() {
if (attributes == null)
attributes = new Attributes();
if (pendingAttributeName != null) {
Attribute attribute;
if (pendingAttributeValue == null)
attribute = new Attribute(pendingAttributeName, "");
else
attribute = new Attribute(pendingAttributeName, pendingAttributeValue.toString());
attributes.put(attribute);
}
pendingAttributeName = null;
if (pendingAttributeValue != null)
pendingAttributeValue.delete(0, pendingAttributeValue.length());
}
void finaliseTag() {
// finalises for emit
if (pendingAttributeName != null) {
// todo: check if attribute name exists; if so, drop and error
newAttribute();
}
}
String name() {
Validate.isFalse(tagName.length() == 0);
return tagName;
}
Tag name(String name) {
tagName = name;
return this;
}
boolean isSelfClosing() {
return selfClosing;
}
@
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>SuppressWarnings({"TypeMayBeWeakened"})
Attributes getAttributes() {
return attributes;
}
// these appenders are rarely hit in not null state-- caused by null chars.
void appendTagName(String append) {
tagName = tagName == null ? append : tagName.concat(append);
}
void appendTagName(char append) {
appendTagName(String.valueOf(append));
}
void appendAttributeName(String append) {
pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
}
void appendAttributeName(char append) {
appendAttributeName(String.valueOf(append));
}
void appendAttributeValue(String append) {
pendingAttributeValue = pendingAttributeValue == null ? new StringBuilder(append) : pendingAttributeValue.append(append);
}
void appendAttributeValue(char append) {
appendAttributeValue(String.valueOf(append));
}
}
static class StartTag extends Tag {
StartTag() {
super();
attributes = new Attributes();
type = TokenType.StartTag;
}
StartTag(String name) {
this();
this.tagName = name;
}
StartTag(String name, Attributes attributes) {
this();
this.tagName = name;
this.attributes = attributes;
}
@Override
public String toString() {
if (attributes != null && attributes.size() > 0)
return "<" + name() + " " + attributes.toString() + ">";
else
return "<" + name() + ">";
}
}
static class EndTag extends Tag{
EndTag() {
super();
type = TokenType.EndTag;
}
EndTag(String name) {
this();
this.tagName = name;
}
@Override
public String toString() {
return "</" + name() + ">";
}
}
static class Comment extends Token {
final StringBuilder data = new StringBuilder();
Comment() {
type = TokenType.Comment;
}
String getData() {
return data.toString();
}
@Override
public String toString() {
return "<!--" + getData() + "-->";
}
}
static class Character extends Token {
private final String data;
Character(String data) {
type = TokenType.Character;
this.data = data;
}
String getData() {
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> element's value in each of the matched elements.
* @param value The value to set into each matched element
* @return this (for chaining)
*/
public Elements val(String value) {
for (Element element : contents)
element.val(value);
return this;
}
/**
* Get the combined text of all the matched elements.
* <p>
* Note that it is possible to get repeats if the matched elements contain both parent elements and their own
* children, as the Element.text() method returns the combined text of a parent and all its children.
* @return string of all text: unescaped and no HTML.
* @see Element#text()
*/
public String text() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
if (sb.length() != 0)
sb.append(" ");
sb.append(element.text());
}
return sb.toString();
}
public boolean hasText() {
for (Element element: contents) {
if (element.hasText())
return true;
}
return false;
}
/**
* Get the combined inner HTML of all matched elements.
* @return string of all element's inner HTML.
* @see #text()
* @see #outerHtml()
*/
public String html() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
if (sb.length() != 0)
sb.append("\n");
sb.append(element.html());
}
return sb.toString();
}
/**
* Get the combined outer HTML of all matched elements.
* @return string of all element's outer HTML.
* @see #text()
* @see #html()
*/
public String outerHtml() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
if (sb.length() != 0)
sb.append("\n");
sb.append(element.outerHtml());
}
return sb.toString();
}
/**
* Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}.
* @return string of all element's outer HTML.
* @see #text()
* @see #html()
*/
public String toString() {
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>(int)} to retrieve an Element.
* @param index the (zero-based) index of the element in the list to retain
* @return Elements containing only the specified element, or, if that element did not exist, an empty list.
*/
public Elements eq(int index) {
return contents.size() > index ? new Elements(get(index)) : new Elements();
}
/**
* Test if any of the matched elements match the supplied query.
* @param query A selector
* @return true if at least one element in the list matches the query.
*/
public boolean is(String query) {
Elements children = select(query);
return !children.isEmpty();
}
/**
* Get all of the parents and ancestor elements of the matched elements.
* @return all of the parents and ancestor elements of the matched elements
*/
public Elements parents() {
HashSet<Element> combo = new LinkedHashSet<Element>();
for (Element e: contents) {
combo.addAll(e.parents());
}
return new Elements(combo);
}
// list-like methods
/**
Get the first matched element.
@return The first matched element, or <code>null</code> if contents is empty;
*/
public Element first() {
return contents.isEmpty() ? null : contents.get(0);
}
/**
Get the last matched element.
@return The last matched element, or <code>null</code> if contents is empty.
*/
public Element last() {
return contents.isEmpty() ? null : contents.get(contents.size() - 1);
}
/**
* Perform a depth-first traversal on each of the selected elements.
* @param nodeVisitor the visitor callbacks to perform on each node
* @return this, for chaining
*/
public Elements traverse(NodeVisitor nodeVisitor) {
Validate.notNull(nodeVisitor);
NodeTraversor traversor = new NodeTraversor(nodeVisitor);
for (Element el: contents) {
traversor.traverse(el);
}
return this;
}
// implements List<Element> delegates:
public int size() {return contents.size();}
public boolean isEmpty() {return contents.isEmpty();}
public boolean contains(Object o) {return contents.contains(o);}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import java.util.ArrayList;
/**
* A container for ParseErrors.
*
* @author Jonathan Hedley
*/
class ParseErrorList extends ArrayList<ParseError>{
private static final int INITIAL_CAPACITY = 16;
private final int maxSize;
ParseErrorList(int initialCapacity, int maxSize) {
super(initialCapacity);
this.maxSize = maxSize;
}
boolean canAddError() {
return size() < maxSize;
}
int getMaxSize() {
return maxSize;
}
static ParseErrorList noTracking() {
return new ParseErrorList(0, 0);
}
static ParseErrorList tracking(int maxSize) {
return new ParseErrorList(INITIAL_CAPACITY, maxSize);
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
/**
* States and transition activations for the Tokeniser.
*/
enum TokeniserState {
Data {
// in data state, gather characters until a character reference or tag is found
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case '&':
t.advanceTransition(CharacterReferenceInData);
break;
case '<':
t.advanceTransition(TagOpen);
break;
case nullChar:
t.error(this); // NOT replacement character (oddly?)
t.emit(r.consume());
break;
case eof:
t.emit(new Token.EOF());
break;
default:
String data = r.consumeToAny('&', '<', nullChar);
t.emit(data);
break;
}
}
},
CharacterReferenceInData {
// from & in data
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
if (c == null)
t.emit('&');
else
t.emit(c);
t.transition(Data);
}
},
Rcdata {
/// handles data in title, textarea etc
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case '&':
t.advanceTransition(CharacterReferenceInRcdata);
break;
case '<':
t.advanceTransition(RcdataLessthanSign);
break;
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
case eof:
t.emit(new Token.EOF());
break;
default:
String data = r.consumeToAny('&', '<', nullChar);
t.emit(data);
break;
}
}
},
CharacterReferenceInRcdata {
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
if (c == null)
t.emit('&');
else
t.emit(c);
t.transition(Rcdata);
}
},
Rawtext {
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case '<':
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> t.advanceTransition(RawtextLessthanSign);
break;
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
case eof:
t.emit(new Token.EOF());
break;
default:
String data = r.consumeToAny('<', nullChar);
t.emit(data);
break;
}
}
},
ScriptData {
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case '<':
t.advanceTransition(ScriptDataLessthanSign);
break;
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
case eof:
t.emit(new Token.EOF());
break;
default:
String data = r.consumeToAny('<', nullChar);
t.emit(data);
break;
}
}
},
PLAINTEXT {
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
case eof:
t.emit(new Token.EOF());
break;
default:
String data = r.consumeTo(nullChar);
t.emit(data);
break;
}
}
},
TagOpen {
// from < in data
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
case '!':
t.advanceTransition(MarkupDeclarationOpen);
break;
case '/':
t.advanceTransition(EndTagOpen);
break;
case '?':
t.advanceTransition(BogusComment);
break;
default:
if (r.matchesLetter()) {
t.createTagPending(true);
t.transition(TagName);
} else {
t.error(this);
t.emit('<'); // char that got us here
t.transition(Data);
}
break;
}
}
},
EndTagOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
t.emit
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>("</");
t.transition(Data);
} else if (r.matchesLetter()) {
t.createTagPending(false);
t.transition(TagName);
} else if (r.matches('>')) {
t.error(this);
t.advanceTransition(Data);
} else {
t.error(this);
t.advanceTransition(BogusComment);
}
}
},
TagName {
// from < or </ in data, will have start or end tag pending
void read(Tokeniser t, CharacterReader r) {
// previous TagOpen state did NOT consume, will have a letter char in current
String tagName = r.consumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', nullChar).toLowerCase();
t.tagPending.appendTagName(tagName);
switch (r.consume()) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case nullChar: // replacement
t.tagPending.appendTagName(replacementStr);
break;
case eof: // should emit pending tag?
t.eofError(this);
t.transition(Data);
// no default, as covered with above consumeToAny
}
}
},
RcdataLessthanSign {
// from < in rcdata
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.createTempBuffer();
t.advanceTransition(RCDATAEndTagOpen);
} else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
// diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
// consuming to EOF; break out here
t.tagPending = new Token.EndTag(t.appropriateEndTagName());
t.emitTagPending();
r.unconsume(); // undo "<"
t.transition(Data);
} else {
t.emit("<");
t.transition(Rcdata);
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
}
},
RCDATAEndTagOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
t.tagPending.appendTagName(Character.toLowerCase(r.current()));
t.dataBuffer.append(Character.toLowerCase(r.current()));
t.advanceTransition(RCDATAEndTagName);
} else {
t.emit("</");
t.transition(Rcdata);
}
}
},
RCDATAEndTagName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.tagPending.appendTagName(name.toLowerCase());
t.dataBuffer.append(name);
return;
}
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
if (t.isAppropriateEndTagToken())
t.transition(BeforeAttributeName);
else
anythingElse(t, r);
break;
case '/':
if (t.isAppropriateEndTagToken())
t.transition(SelfClosingStartTag);
else
anythingElse(t, r);
break;
case '>':
if (t.isAppropriateEndTagToken()) {
t.emitTagPending();
t.transition(Data);
}
else
anythingElse(t, r);
break;
default:
anythingElse(t, r);
}
}
private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(Rcdata);
}
},
RawtextLessthanSign {
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.createTempBuffer();
t.advanceTransition(RawtextEndTagOpen);
} else {
t.emit('<');
t.transition(Rawtext);
}
}
},
RawtextEndTagOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
t.transition(RawtextEndTagName);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
} else {
t.emit("</");
t.transition(Rawtext);
}
}
},
RawtextEndTagName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.tagPending.appendTagName(name.toLowerCase());
t.dataBuffer.append(name);
return;
}
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
default:
t.dataBuffer.append(c);
anythingElse(t, r);
}
} else
anythingElse(t, r);
}
private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(Rawtext);
}
},
ScriptDataLessthanSign {
void read(Tokeniser t, CharacterReader r) {
switch (r.consume()) {
case '/':
t.createTempBuffer();
t.transition(ScriptDataEndTagOpen);
break;
case '!':
t.emit("<!");
t.transition(ScriptDataEscapeStart);
break;
default:
t.emit("<");
r.unconsume();
t.transition(ScriptData);
}
}
},
ScriptDataEndTagOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
t.transition(ScriptDataEndTagName);
} else {
t.emit("</");
t.transition(ScriptData);
}
}
},
ScriptDataEndTagName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.tagPending.appendTagName(name.toLowerCase());
t.dataBuffer.
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>append(name);
return;
}
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
default:
t.dataBuffer.append(c);
anythingElse(t, r);
}
} else {
anythingElse(t, r);
}
}
private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(ScriptData);
}
},
ScriptDataEscapeStart {
void read(Tokeniser t, CharacterReader r) {
if (r.matches('-')) {
t.emit('-');
t.advanceTransition(ScriptDataEscapeStartDash);
} else {
t.transition(ScriptData);
}
}
},
ScriptDataEscapeStartDash {
void read(Tokeniser t, CharacterReader r) {
if (r.matches('-')) {
t.emit('-');
t.advanceTransition(ScriptDataEscapedDashDash);
} else {
t.transition(ScriptData);
}
}
},
ScriptDataEscaped {
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
t.transition(Data);
return;
}
switch (r.current()) {
case '-':
t.emit('-');
t.advanceTransition(ScriptDataEscapedDash);
break;
case '<':
t.advanceTransition(ScriptDataEscapedLessthanSign);
break;
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
default:
String data = r.consumeToAny('-', '<', nullChar);
t.emit(data);
}
}
},
ScriptDataEscapedDash {
void read(Tokeniser t, CharacterReader r) {
if (
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>r.isEmpty()) {
t.eofError(this);
t.transition(Data);
return;
}
char c = r.consume();
switch (c) {
case '-':
t.emit(c);
t.transition(ScriptDataEscapedDashDash);
break;
case '<':
t.transition(ScriptDataEscapedLessthanSign);
break;
case nullChar:
t.error(this);
t.emit(replacementChar);
t.transition(ScriptDataEscaped);
break;
default:
t.emit(c);
t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedDashDash {
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
t.transition(Data);
return;
}
char c = r.consume();
switch (c) {
case '-':
t.emit(c);
break;
case '<':
t.transition(ScriptDataEscapedLessthanSign);
break;
case '>':
t.emit(c);
t.transition(ScriptData);
break;
case nullChar:
t.error(this);
t.emit(replacementChar);
t.transition(ScriptDataEscaped);
break;
default:
t.emit(c);
t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedLessthanSign {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTempBuffer();
t.dataBuffer.append(Character.toLowerCase(r.current()));
t.emit("<" + r.current());
t.advanceTransition(ScriptDataDoubleEscapeStart);
} else if (r.matches('/')) {
t.createTempBuffer();
t.advanceTransition(ScriptDataEscapedEndTagOpen);
} else {
t.emit('<');
t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedEndTagOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
t.tagPending.appendTagName(
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Character.toLowerCase(r.current()));
t.dataBuffer.append(r.current());
t.advanceTransition(ScriptDataEscapedEndTagName);
} else {
t.emit("</");
t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedEndTagName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.tagPending.appendTagName(name.toLowerCase());
t.dataBuffer.append(name);
return;
}
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
default:
t.dataBuffer.append(c);
anythingElse(t, r);
break;
}
} else {
anythingElse(t, r);
}
}
private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(ScriptDataEscaped);
}
},
ScriptDataDoubleEscapeStart {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.dataBuffer.append(name.toLowerCase());
t.emit(name);
return;
}
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
case '/':
case '>':
if (t.dataBuffer.toString().equals("script"))
t.transition(ScriptDataDoubleEscaped);
else
t.transition(ScriptDataEscaped);
t.emit(c);
break;
default:
r.unconsume();
t.transition(ScriptDataEscaped);
}
}
},
ScriptDataDoubleEscaped {
void read
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>(Tokeniser t, CharacterReader r) {
char c = r.current();
switch (c) {
case '-':
t.emit(c);
t.advanceTransition(ScriptDataDoubleEscapedDash);
break;
case '<':
t.emit(c);
t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
break;
case nullChar:
t.error(this);
r.advance();
t.emit(replacementChar);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
default:
String data = r.consumeToAny('-', '<', nullChar);
t.emit(data);
}
}
},
ScriptDataDoubleEscapedDash {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.emit(c);
t.transition(ScriptDataDoubleEscapedDashDash);
break;
case '<':
t.emit(c);
t.transition(ScriptDataDoubleEscapedLessthanSign);
break;
case nullChar:
t.error(this);
t.emit(replacementChar);
t.transition(ScriptDataDoubleEscaped);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
default:
t.emit(c);
t.transition(ScriptDataDoubleEscaped);
}
}
},
ScriptDataDoubleEscapedDashDash {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.emit(c);
break;
case '<':
t.emit(c);
t.transition(ScriptDataDoubleEscapedLessthanSign);
break;
case '>':
t.emit(c);
t.transition(ScriptData);
break;
case nullChar:
t.error(this);
t.emit(replacementChar);
t.transition(ScriptDataDoubleEscaped);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
default:
t.emit(c);
t.transition(Script
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>DataDoubleEscaped);
}
}
},
ScriptDataDoubleEscapedLessthanSign {
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.emit('/');
t.createTempBuffer();
t.advanceTransition(ScriptDataDoubleEscapeEnd);
} else {
t.transition(ScriptDataDoubleEscaped);
}
}
},
ScriptDataDoubleEscapeEnd {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.dataBuffer.append(name.toLowerCase());
t.emit(name);
return;
}
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
case '/':
case '>':
if (t.dataBuffer.toString().equals("script"))
t.transition(ScriptDataEscaped);
else
t.transition(ScriptDataDoubleEscaped);
t.emit(c);
break;
default:
r.unconsume();
t.transition(ScriptDataDoubleEscaped);
}
}
},
BeforeAttributeName {
// from tagname <xxx
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
break; // ignore whitespace
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.tagPending.newAttribute();
r.unconsume();
t.transition(AttributeName);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
case '"':
case '\'':
case '<':
case '=':
t.error(this);
t.tagPending.newAttribute();
t.tagPending.appendAttributeName(c);
t.transition(AttributeName);
break;
default: // A-Z, anything else
t.tagPending.newAttribute();
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> r.unconsume();
t.transition(AttributeName);
}
}
},
AttributeName {
// from before attribute name
void read(Tokeniser t, CharacterReader r) {
String name = r.consumeToAny('\t', '\n', '\r', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<');
t.tagPending.appendAttributeName(name.toLowerCase());
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(AfterAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '=':
t.transition(BeforeAttributeValue);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeName(replacementChar);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
case '"':
case '\'':
case '<':
t.error(this);
t.tagPending.appendAttributeName(c);
// no default, as covered in consumeToAny
}
}
},
AfterAttributeName {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
// ignore
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '=':
t.transition(BeforeAttributeValue);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeName(replacementChar);
t.transition(AttributeName);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
case '"':
case '\'':
case '<':
t.error(this);
t.tagPending.newAttribute();
t.tagPending.appendAttributeName(c);
t.transition(AttributeName);
break;
default:
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> // A-Z, anything else
t.tagPending.newAttribute();
r.unconsume();
t.transition(AttributeName);
}
}
},
BeforeAttributeValue {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
// ignore
break;
case '"':
t.transition(AttributeValue_doubleQuoted);
break;
case '&':
r.unconsume();
t.transition(AttributeValue_unquoted);
break;
case '\'':
t.transition(AttributeValue_singleQuoted);
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeValue(replacementChar);
t.transition(AttributeValue_unquoted);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
case '>':
t.error(this);
t.emitTagPending();
t.transition(Data);
break;
case '<':
case '=':
case '`':
t.error(this);
t.tagPending.appendAttributeValue(c);
t.transition(AttributeValue_unquoted);
break;
default:
r.unconsume();
t.transition(AttributeValue_unquoted);
}
}
},
AttributeValue_doubleQuoted {
void read(Tokeniser t, CharacterReader r) {
String value = r.consumeToAny('"', '&', nullChar);
if (value.length() > 0)
t.tagPending.appendAttributeValue(value);
char c = r.consume();
switch (c) {
case '"':
t.transition(AfterAttributeValue_quoted);
break;
case '&':
Character ref = t.consumeCharacterReference('"', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
else
t.tagPending.appendAttributeValue('&');
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeValue(replacementChar);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
// no default, handled in consume to any above
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
}
},
AttributeValue_singleQuoted {
void read(Tokeniser t, CharacterReader r) {
String value = r.consumeToAny('\'', '&', nullChar);
if (value.length() > 0)
t.tagPending.appendAttributeValue(value);
char c = r.consume();
switch (c) {
case '\'':
t.transition(AfterAttributeValue_quoted);
break;
case '&':
Character ref = t.consumeCharacterReference('\'', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
else
t.tagPending.appendAttributeValue('&');
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeValue(replacementChar);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
// no default, handled in consume to any above
}
}
},
AttributeValue_unquoted {
void read(Tokeniser t, CharacterReader r) {
String value = r.consumeToAny('\t', '\n', '\r', '\f', ' ', '&', '>', nullChar, '"', '\'', '<', '=', '`');
if (value.length() > 0)
t.tagPending.appendAttributeValue(value);
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '&':
Character ref = t.consumeCharacterReference('>', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
else
t.tagPending.appendAttributeValue('&');
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.tagPending.appendAttributeValue(replacementChar);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
case '"':
case '\'':
case '<':
case '=':
case '`':
t.error(this);
t.tagPending.appendAttributeValue(c);
break;
// no default, handled in consume to any above
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
}
}
},
// CharacterReferenceInAttributeValue state handled inline
AfterAttributeValue_quoted {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeAttributeName);
break;
case '/':
t.transition(SelfClosingStartTag);
break;
case '>':
t.emitTagPending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
default:
t.error(this);
r.unconsume();
t.transition(BeforeAttributeName);
}
}
},
SelfClosingStartTag {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '>':
t.tagPending.selfClosing = true;
t.emitTagPending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.transition(Data);
break;
default:
t.error(this);
t.transition(BeforeAttributeName);
}
}
},
BogusComment {
void read(Tokeniser t, CharacterReader r) {
// todo: handle bogus comment starting from eof. when does that trigger?
// rewind to capture character that lead us here
r.unconsume();
Token.Comment comment = new Token.Comment();
comment.data.append(r.consumeTo('>'));
// todo: replace nullChar with replaceChar
t.emit(comment);
t.advanceTransition(Data);
}
},
MarkupDeclarationOpen {
void read(Tokeniser t, CharacterReader r) {
if (r.matchConsume("--")) {
t.createCommentPending();
t.transition(CommentStart);
} else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
t.transition(Doctype);
} else if (r.matchConsume("[CDATA[")) {
// todo: should actually check current namepspace, and only non-html allows cdata. until namespace
// is implemented properly, keep handling as cdata
//} else
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
t.transition(CdataSection);
} else {
t.error(this);
t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind
}
}
},
CommentStart {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.transition(CommentStartDash);
break;
case nullChar:
t.error(this);
t.commentPending.data.append(replacementChar);
t.transition(Comment);
break;
case '>':
t.error(this);
t.emitCommentPending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.emitCommentPending();
t.transition(Data);
break;
default:
t.commentPending.data.append(c);
t.transition(Comment);
}
}
},
CommentStartDash {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.transition(CommentStartDash);
break;
case nullChar:
t.error(this);
t.commentPending.data.append(replacementChar);
t.transition(Comment);
break;
case '>':
t.error(this);
t.emitCommentPending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.emitCommentPending();
t.transition(Data);
break;
default:
t.commentPending.data.append(c);
t.transition(Comment);
}
}
},
Comment {
void read(Tokeniser t, CharacterReader r) {
char c = r.current();
switch (c) {
case '-':
t.advanceTransition(CommentEndDash);
break;
case nullChar:
t.error(this);
r.advance();
t.commentPending.data.append(replacementChar);
break;
case eof:
t.eofError(this);
t.emitCommentPending();
t.transition
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>(Data);
break;
default:
t.commentPending.data.append(r.consumeToAny('-', nullChar));
}
}
},
CommentEndDash {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.transition(CommentEnd);
break;
case nullChar:
t.error(this);
t.commentPending.data.append('-').append(replacementChar);
t.transition(Comment);
break;
case eof:
t.eofError(this);
t.emitCommentPending();
t.transition(Data);
break;
default:
t.commentPending.data.append('-').append(c);
t.transition(Comment);
}
}
},
CommentEnd {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '>':
t.emitCommentPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.commentPending.data.append("--").append(replacementChar);
t.transition(Comment);
break;
case '!':
t.error(this);
t.transition(CommentEndBang);
break;
case '-':
t.error(this);
t.commentPending.data.append('-');
break;
case eof:
t.eofError(this);
t.emitCommentPending();
t.transition(Data);
break;
default:
t.error(this);
t.commentPending.data.append("--").append(c);
t.transition(Comment);
}
}
},
CommentEndBang {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '-':
t.commentPending.data.append("--!");
t.transition(CommentEndDash);
break;
case '>':
t.emitCommentPending();
t.transition(Data);
break;
case nullChar:
t.error(this);
t.commentPending.data.append("--!").append(replacementChar);
t.transition(Comment);
break;
case eof:
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> t.eofError(this);
t.emitCommentPending();
t.transition(Data);
break;
default:
t.commentPending.data.append("--!").append(c);
t.transition(Comment);
}
}
},
Doctype {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeDoctypeName);
break;
case eof:
t.eofError(this);
t.createDoctypePending();
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.transition(BeforeDoctypeName);
}
}
},
BeforeDoctypeName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createDoctypePending();
t.transition(DoctypeName);
return;
}
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
break; // ignore whitespace
case nullChar:
t.error(this);
t.doctypePending.name.append(replacementChar);
t.transition(DoctypeName);
break;
case eof:
t.eofError(this);
t.createDoctypePending();
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.createDoctypePending();
t.doctypePending.name.append(c);
t.transition(DoctypeName);
}
}
},
DoctypeName {
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
t.doctypePending.name.append(name.toLowerCase());
return;
}
char c = r.consume();
switch (c) {
case '>':
t.emitDoctypePending();
t.transition(Data
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>);
break;
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(AfterDoctypeName);
break;
case nullChar:
t.error(this);
t.doctypePending.name.append(replacementChar);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.doctypePending.name.append(c);
}
}
},
AfterDoctypeName {
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
return;
}
if (r.matchesAny('\t', '\n', '\r', '\f', ' '))
r.advance(); // ignore whitespace
else if (r.matches('>')) {
t.emitDoctypePending();
t.advanceTransition(Data);
} else if (r.matchConsumeIgnoreCase("PUBLIC")) {
t.transition(AfterDoctypePublicKeyword);
} else if (r.matchConsumeIgnoreCase("SYSTEM")) {
t.transition(AfterDoctypeSystemKeyword);
} else {
t.error(this);
t.doctypePending.forceQuirks = true;
t.advanceTransition(BogusDoctype);
}
}
},
AfterDoctypePublicKeyword {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeDoctypePublicIdentifier);
break;
case '"':
t.error(this);
// set public id to empty string
t.transition(DoctypePublicIdentifier_doubleQuoted);
break;
case '\'':
t.error(this);
// set public id to empty string
t.transition(DoctypePublicIdentifier_singleQuoted);
break;
case '>':
t.error(this);
t.doctypePending.force
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Quirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.transition(BogusDoctype);
}
}
},
BeforeDoctypePublicIdentifier {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
break;
case '"':
// set public id to empty string
t.transition(DoctypePublicIdentifier_doubleQuoted);
break;
case '\'':
// set public id to empty string
t.transition(DoctypePublicIdentifier_singleQuoted);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.transition(BogusDoctype);
}
}
},
DoctypePublicIdentifier_doubleQuoted {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '"':
t.transition(AfterDoctypePublicIdentifier);
break;
case nullChar:
t.error(this);
t.doctypePending.publicIdentifier.append(replacementChar);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>.transition(Data);
break;
default:
t.doctypePending.publicIdentifier.append(c);
}
}
},
DoctypePublicIdentifier_singleQuoted {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\'':
t.transition(AfterDoctypePublicIdentifier);
break;
case nullChar:
t.error(this);
t.doctypePending.publicIdentifier.append(replacementChar);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.doctypePending.publicIdentifier.append(c);
}
}
},
AfterDoctypePublicIdentifier {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BetweenDoctypePublicAndSystemIdentifiers);
break;
case '>':
t.emitDoctypePending();
t.transition(Data);
break;
case '"':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_doubleQuoted);
break;
case '\'':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_singleQuoted);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.transition(BogusDoctype);
}
}
},
BetweenDoctypePublicAndSystemIdentifiers {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>':
case '\r':
case '\f':
case ' ':
break;
case '>':
t.emitDoctypePending();
t.transition(Data);
break;
case '"':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_doubleQuoted);
break;
case '\'':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_singleQuoted);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.transition(BogusDoctype);
}
}
},
AfterDoctypeSystemKeyword {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
t.transition(BeforeDoctypeSystemIdentifier);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case '"':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_doubleQuoted);
break;
case '\'':
t.error(this);
// system id empty
t.transition(DoctypeSystemIdentifier_singleQuoted);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
}
}
},
BeforeDoctypeSystemIdentifier {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
break;
case '"
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>':
// set system id to empty string
t.transition(DoctypeSystemIdentifier_doubleQuoted);
break;
case '\'':
// set public id to empty string
t.transition(DoctypeSystemIdentifier_singleQuoted);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.doctypePending.forceQuirks = true;
t.transition(BogusDoctype);
}
}
},
DoctypeSystemIdentifier_doubleQuoted {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '"':
t.transition(AfterDoctypeSystemIdentifier);
break;
case nullChar:
t.error(this);
t.doctypePending.systemIdentifier.append(replacementChar);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.doctypePending.systemIdentifier.append(c);
}
}
},
DoctypeSystemIdentifier_singleQuoted {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\'':
t.transition(AfterDoctypeSystemIdentifier);
break;
case nullChar:
t.error(this);
t.doctypePending.systemIdentifier.append(replacementChar);
break;
case '>':
t.error(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctype
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Pending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.doctypePending.systemIdentifier.append(c);
}
}
},
AfterDoctypeSystemIdentifier {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '\t':
case '\n':
case '\r':
case '\f':
case ' ':
break;
case '>':
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.eofError(this);
t.doctypePending.forceQuirks = true;
t.emitDoctypePending();
t.transition(Data);
break;
default:
t.error(this);
t.transition(BogusDoctype);
// NOT force quirks
}
}
},
BogusDoctype {
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
case '>':
t.emitDoctypePending();
t.transition(Data);
break;
case eof:
t.emitDoctypePending();
t.transition(Data);
break;
default:
// ignore char
break;
}
}
},
CdataSection {
void read(Tokeniser t, CharacterReader r) {
String data = r.consumeTo("]]>");
t.emit(data);
r.matchConsume("]]>");
t.transition(Data);
}
};
abstract void read(Tokeniser t, CharacterReader r);
private static final char nullChar = '\u0000';
private static final char replacementChar = Tokeniser.replacementChar;
private static final String replacementStr = String.valueOf(Tokeniser.replacementChar);
private static final char eof = CharacterReader.EOF;
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.DescendableLinkedList;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
/**
* HTML Tree Builder; creates a DOM from Tokens.
*/
class HtmlTreeBuilder extends TreeBuilder {
private HtmlTreeBuilderState state; // the current state
private HtmlTreeBuilderState originalState; // original / marked state
private boolean baseUriSetFromDoc = false;
private Element headElement; // the current head element
private Element formElement; // the current form element
private Element contextElement; // fragment parse context -- could be null even if fragment parsing
private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
private boolean framesetOk = true; // if ok to go into frameset
private boolean fosterInserts = false; // if next inserts should be fostered
private boolean fragmentParsing = false; // if parsing a fragment of html
HtmlTreeBuilder() {}
@Override
Document parse(String input, String baseUri, ParseErrorList errors) {
state = HtmlTreeBuilderState.Initial;
return super.parse(input, baseUri, errors);
}
List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
// context may be null
state = HtmlTreeBuilderState.Initial;
initialiseParse(inputFragment, baseUri, errors);
contextElement = context;
fragmentParsing = true;
Element root = null;
if (context != null) {
if (context.ownerDocument() != null) // quirks setup:
doc.quirksMode(context.ownerDocument().quirksMode());
// initialise the tokeniser state:
String contextTag = context.tagName();
if (StringUtil.in(contextTag, "title", "textarea"))
tokeniser.transition(TokeniserState.Rcdata);
else if (StringUtil.in(
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
tokeniser.transition(TokeniserState.Rawtext);
else if (contextTag.equals("script"))
tokeniser.transition(TokeniserState.ScriptData);
else if (contextTag.equals(("noscript")))
tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
else if (contextTag.equals("plaintext"))
tokeniser.transition(TokeniserState.Data);
else
tokeniser.transition(TokeniserState.Data); // default
root = new Element(Tag.valueOf("html"), baseUri);
doc.appendChild(root);
stack.push(root);
resetInsertionMode();
// todo: setup form element to nearest form on context (up ancestor chain)
}
runParser();
if (context != null)
return root.childNodes();
else
return doc.childNodes();
}
@Override
protected boolean process(Token token) {
currentToken = token;
return this.state.process(token, this);
}
boolean process(Token token, HtmlTreeBuilderState state) {
currentToken = token;
return state.process(token, this);
}
void transition(HtmlTreeBuilderState state) {
this.state = state;
}
HtmlTreeBuilderState state() {
return state;
}
void markInsertionMode() {
originalState = state;
}
HtmlTreeBuilderState originalState() {
return originalState;
}
void framesetOk(boolean framesetOk) {
this.framesetOk = framesetOk;
}
boolean framesetOk() {
return framesetOk;
}
Document getDocument() {
return doc;
}
String getBaseUri() {
return baseUri;
}
void maybeSetBaseUri(Element base) {
if (baseUriSetFromDoc) // only listen to the first <base href> in parse
return;
String href = base.absUrl("href");
if (href.length() != 0) { // ignore <base target> etc
baseUri = href;
baseUriSetFromDoc = true;
doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base,
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> and to update all descendants
}
}
boolean isFragmentParsing() {
return fragmentParsing;
}
void error(HtmlTreeBuilderState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state));
}
Element insert(Token.StartTag startTag) {
// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag.
if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) {
Element el = insertEmpty(startTag);
process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in
return el;
}
Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes);
insert(el);
return el;
}
Element insert(String startTagName) {
Element el = new Element(Tag.valueOf(startTagName), baseUri);
insert(el);
return el;
}
void insert(Element el) {
insertNode(el);
stack.add(el);
}
Element insertEmpty(Token.StartTag startTag) {
Tag tag = Tag.valueOf(startTag.name());
Element el = new Element(tag, baseUri, startTag.attributes);
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output
tag.setSelfClosing();
}
return el;
}
void insert(Token.Comment commentToken) {
Comment comment = new Comment(commentToken.getData(), baseUri);
insertNode(comment);
}
void insert(Token.Character characterToken) {
Node node;
// characters in script and style go in as datanodes, not text nodes
if (StringUtil.in(currentElement().tagName(), "script", "style"))
node = new DataNode(characterToken.getData(), baseUri);
else
node = new TextNode(characterToken.getData(), baseUri);
currentElement().appendChild(node);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> }
entry = formattingElements.get(--pos); // step 5. one earlier than entry
if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
break; // jump to 8, else continue back to 4
}
while(true) {
if (!skip) // step 7: on later than entry
entry = formattingElements.get(++pos);
Validate.notNull(entry); // should not occur, as we break at last element
// 8. create new element from element, 9 insert into current node, onto stack
skip = false; // can only skip increment from 4.
Element newEl = insert(entry.nodeName()); // todo: avoid fostering here?
// newEl.namespace(entry.namespace()); // todo: namespaces
newEl.attributes().addAll(entry.attributes());
// 10. replace entry with new entry
formattingElements.add(pos, newEl);
formattingElements.remove(pos + 1);
// 11
if (pos == size-1) // if not last entry in list, jump to 7
break;
}
}
void clearFormattingElementsToLastMarker() {
while (!formattingElements.isEmpty()) {
Element el = formattingElements.peekLast();
formattingElements.removeLast();
if (el == null)
break;
}
}
void removeFromActiveFormattingElements(Element el) {
Iterator<Element> it = formattingElements.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
if (next == el) {
it.remove();
break;
}
}
}
boolean isInActiveFormattingElements(Element el) {
return isElementInQueue(formattingElements, el);
}
Element getActiveFormattingElement(String nodeName) {
Iterator<Element> it = formattingElements.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
if (next == null) // scope marker
break;
else if (next.nodeName().equals(nodeName))
return next;
}
return null;
}
void replaceActiveFormattingElement(Element out, Element in) {
replaceInQueue(formattingElements, out, in);
}
void insertMarkerToFormattingElements() {
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> return attributes != null && attributes.containsKey(key.toLowerCase());
}
/**
Get the number of attributes in this set.
@return size
*/
public int size() {
if (attributes == null)
return 0;
return attributes.size();
}
/**
Add all the attributes from the incoming set to this set.
@param incoming attributes to add to these attributes.
*/
public void addAll(Attributes incoming) {
if (incoming.size() == 0)
return;
if (attributes == null)
attributes = new LinkedHashMap<String, Attribute>(incoming.size());
attributes.putAll(incoming.attributes);
}
public Iterator<Attribute> iterator() {
return asList().iterator();
}
/**
Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
to keys will not be recognised in the containing set.
@return an view of the attributes as a List.
*/
public List<Attribute> asList() {
if (attributes == null)
return Collections.emptyList();
List<Attribute> list = new ArrayList<Attribute>(attributes.size());
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
list.add(entry.getValue());
}
return Collections.unmodifiableList(list);
}
/**
* Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
* starting with {@code data-}.
* @return map of custom data attributes.
*/
public Map<String, String> dataset() {
return new Dataset();
}
/**
Get the HTML representation of these attributes.
@return HTML
*/
public String html() {
StringBuilder accum = new StringBuilder();
html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
return accum.toString();
}
void html(StringBuilder accum, Document.OutputSettings out) {
if (attributes == null)
return;
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
Attribute attribute = entry.getValue();
accum.append(" ");
attribute.html(accum, out);
}
}
public String toString() {
return html();
}
@Override
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> }
}
// merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
private void normaliseStructure(String tag, Element htmlEl) {
Elements elements = this.getElementsByTag(tag);
Element master = elements.first(); // will always be available as created above if not existent
if (elements.size() > 1) { // dupes, move contents to master
List<Node> toMove = new ArrayList<Node>();
for (int i = 1; i < elements.size(); i++) {
Node dupe = elements.get(i);
for (Node node : dupe.childNodes)
toMove.add(node);
dupe.remove();
}
for (Node dupe : toMove)
master.appendChild(dupe);
}
// ensure parented by <html>
if (!master.parent().equals(htmlEl)) {
htmlEl.appendChild(master); // includes remove()
}
}
// fast method to get first by tag name, used for html, head, body finders
private Element findFirstElementByTagName(String tag, Node node) {
if (node.nodeName().equals(tag))
return (Element) node;
else {
for (Node child: node.childNodes) {
Element found = findFirstElementByTagName(tag, child);
if (found != null)
return found;
}
}
return null;
}
@Override
public String outerHtml() {
return super.html(); // no outer wrapper tag
}
/**
Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
@param text unencoded text
@return this document
*/
@Override
public Element text(String text) {
body().text(text); // overridden to not nuke doc structure
return this;
}
@Override
public String nodeName() {
return "#document";
}
@Override
public Document clone() {
Document clone = (Document) super.clone();
clone.outputSettings = this.outputSettings.clone();
return clone;
}
/**
* A Document's output settings control the form of the text() and html() methods.
*/
public static class OutputSettings implements
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
/**
A comment node.
@author Jonathan Hedley, jonathan@hedley.net */
public class Comment extends Node {
private static final String COMMENT_KEY = "comment";
/**
Create a new comment node.
@param data The contents of the comment
@param baseUri base URI
*/
public Comment(String data, String baseUri) {
super(baseUri);
attributes.put(COMMENT_KEY, data);
}
public String nodeName() {
return "#comment";
}
/**
Get the contents of the comment.
@return comment content
*/
public String getData() {
return attributes.get(COMMENT_KEY);
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
if (out.prettyPrint())
indent(accum, depth, out);
accum
.append("<!--")
.append(getData())
.append("-->");
}
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
public String toString() {
return outerHtml();
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
@author Jonathan Hedley, jonathan@hedley.net */
public abstract class Node implements Cloneable {
Node parentNode;
List<Node> childNodes;
Attributes attributes;
String baseUri;
int siblingIndex;
/**
Create a new Node.
@param baseUri base URI
@param attributes attributes (not null, but may be empty)
*/
protected Node(String baseUri, Attributes attributes) {
Validate.notNull(baseUri);
Validate.notNull(attributes);
childNodes = new ArrayList<Node>(4);
this.baseUri = baseUri.trim();
this.attributes = attributes;
}
protected Node(String baseUri) {
this(baseUri, new Attributes());
}
/**
* Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
*/
protected Node() {
childNodes = Collections.emptyList();
attributes = null;
}
/**
Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
@return node name
*/
public abstract String nodeName();
/**
* Get an attribute's value by its key.
* <p/>
* To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
* which is a shortcut to the {@link #absUrl} method.
* E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
* @param attributeKey The attribute key.
* @return The attribute, or empty string if not present (to avoid nulls).
* @see #attributes()
* @
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> before this node
* @return this node, for chaining
* @see #after(Node)
*/
public Node before(Node node) {
Validate.notNull(node);
Validate.notNull(parentNode);
parentNode.addChildren(siblingIndex(), node);
return this;
}
/**
* Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
* @param html HTML to add after this node
* @return this node, for chaining
* @see #before(String)
*/
public Node after(String html) {
addSiblingHtml(siblingIndex()+1, html);
return this;
}
/**
* Insert the specified node into the DOM after this node (i.e. as a following sibling).
* @param node to add after this node
* @return this node, for chaining
* @see #before(Node)
*/
public Node after(Node node) {
Validate.notNull(node);
Validate.notNull(parentNode);
parentNode.addChildren(siblingIndex()+1, node);
return this;
}
private void addSiblingHtml(int index, String html) {
Validate.notNull(html);
Validate.notNull(parentNode);
Element context = parent() instanceof Element ? (Element) parent() : null;
List<Node> nodes = Parser.parseFragment(html, context, baseUri());
parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
}
/**
Wrap the supplied HTML around this node.
@param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
@return this node, for chaining.
*/
public Node wrap(String html) {
Validate.notEmpty(html);
Element context = parent() instanceof Element ? (Element) parent() : null;
List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
Node wrapNode = wrapChildren.get(0);
if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
return null;
Element wrap = (Element) wrapNode;
Element deepest = getDeepChild(wrap);
parentNode.replaceChild(this, wrap);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> 1);
for (Node node: nodes)
if (node != this)
siblings.add(node);
return siblings;
}
/**
Get this node's next sibling.
@return next sibling, or null if this is the last sibling
*/
public Node nextSibling() {
if (parentNode == null)
return null; // root
List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (siblings.size() > index+1)
return siblings.get(index+1);
else
return null;
}
/**
Get this node's previous sibling.
@return the previous sibling, or null if this is the first sibling
*/
public Node previousSibling() {
if (parentNode == null)
return null; // root
List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (index > 0)
return siblings.get(index-1);
else
return null;
}
/**
* Get the list index of this node in its node sibling list. I.e. if this is the first node
* sibling, returns 0.
* @return position in node sibling list
* @see org.jsoup.nodes.Element#elementSiblingIndex()
*/
public int siblingIndex() {
return siblingIndex;
}
protected void setSiblingIndex(int siblingIndex) {
this.siblingIndex = siblingIndex;
}
/**
* Perform a depth-first traversal through this node and its descendants.
* @param nodeVisitor the visitor callbacks to perform on each node
* @return this node, for chaining
*/
public Node traverse(NodeVisitor nodeVisitor) {
Validate.notNull(nodeVisitor);
NodeTraversor traversor = new NodeTraversor(nodeVisitor);
traversor.traverse(this);
return this;
}
/**
Get the outer HTML of this node.
@return HTML
*/
public String outerHtml() {
StringBuilder accum = new StringBuilder(128);
outerHtml(accum);
return accum.toString();
}
protected void outerHtml(StringBuilder accum) {
new NodeTraversor(new OuterHtmlVisitor(accum, getOutput
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>Settings())).traverse(this);
}
// if this node has no document (or parent), retrieve the default output settings
private Document.OutputSettings getOutputSettings() {
return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
}
/**
Get the outer HTML of this node.
@param accum accumulator to place HTML into
*/
abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
public String toString() {
return outerHtml();
}
protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
// todo: have nodes hold a child index, compare against that and parent (not children)
return false;
}
@Override
public int hashCode() {
int result = parentNode != null ? parentNode.hashCode() : 0;
// not children, or will block stack as they go back up to parent)
result = 31 * result + (attributes != null ? attributes.hashCode() : 0);
return result;
}
/**
* Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
* parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
* original node.
* <p>
* The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
* @return stand-alone cloned node
*/
@Override
public Node clone() {
return doClone(null); // splits for orphan
}
protected Node doClone(Node parent) {
Node clone;
try {
clone = (Node) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
clone.parentNode = parent; // can be null, to create an orphan split
clone.siblingIndex = parent == null ? 0 : siblingIndex;
clone.attributes = attributes != null ? attributes.clone
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>() : null;
clone.baseUri = baseUri;
clone.childNodes = new ArrayList<Node>(childNodes.size());
for (Node child: childNodes)
clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent
return clone;
}
private static class OuterHtmlVisitor implements NodeVisitor {
private StringBuilder accum;
private Document.OutputSettings out;
OuterHtmlVisitor(StringBuilder accum, Document.OutputSettings out) {
this.accum = accum;
this.out = out;
}
public void head(Node node, int depth) {
node.outerHtmlHead(accum, depth, out);
}
public void tail(Node node, int depth) {
if (!node.nodeName().equals("#text")) // saves a void hit.
node.outerHtmlTail(accum, depth, out);
}
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.select;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Element;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Evaluates that an element matches the selector.
*/
public abstract class Evaluator {
protected Evaluator() {
}
/**
* Test if the element meets the evaluator's requirements.
*
* @param root Root of the matching subtree
* @param element tested element
*/
public abstract boolean matches(Element root, Element element);
/**
* Evaluator for tag name
*/
public static final class Tag extends Evaluator {
private String tagName;
public Tag(String tagName) {
this.tagName = tagName;
}
@Override
public boolean matches(Element root, Element element) {
return (element.tagName().equals(tagName));
}
@Override
public String toString() {
return String.format("%s", tagName);
}
}
/**
* Evaluator for element id
*/
public static final class Id extends Evaluator {
private String id;
public Id(String id) {
this.id = id;
}
@Override
public boolean matches(Element root, Element element) {
return (id.equals(element.id()));
}
@Override
public String toString() {
return String.format("#%s", id);
}
}
/**
* Evaluator for element class
*/
public static final class Class extends Evaluator {
private String className;
public Class(String className) {
this.className = className;
}
@Override
public boolean matches(Element root, Element element) {
return (element.hasClass(className));
}
@Override
public String toString() {
return String.format(".%s", className);
}
}
/**
* Evaluator for attribute name matching
*/
public static final class Attribute extends Evaluator {
private String key;
public Attribute(String key) {
this.key = key;
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key);
}
@Override
public String toString() {
return String.format("[%s]", key);
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
}
}
/**
* Evaluator for attribute name prefix matching
*/
public static final class AttributeStarting extends Evaluator {
private String keyPrefix;
public AttributeStarting(String keyPrefix) {
this.keyPrefix = keyPrefix;
}
@Override
public boolean matches(Element root, Element element) {
List<org.jsoup.nodes.Attribute> values = element.attributes().asList();
for (org.jsoup.nodes.Attribute attribute : values) {
if (attribute.getKey().startsWith(keyPrefix))
return true;
}
return false;
}
@Override
public String toString() {
return String.format("[^%s]", keyPrefix);
}
}
/**
* Evaluator for attribute name/value matching
*/
public static final class AttributeWithValue extends AttributeKeyPair {
public AttributeWithValue(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key));
}
@Override
public String toString() {
return String.format("[%s=%s]", key, value);
}
}
/**
* Evaluator for attribute name != value matching
*/
public static final class AttributeWithValueNot extends AttributeKeyPair {
public AttributeWithValueNot(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
return !value.equalsIgnoreCase(element.attr(key));
}
@Override
public String toString() {
return String.format("[%s!=%s]", key, value);
}
}
/**
* Evaluator for attribute name/value matching (value prefix)
*/
public static final class AttributeWithValueStarting extends AttributeKeyPair {
public AttributeWithValueStarting(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key) && element.attr(key).toLowerCase().startsWith(value); // value is lower case already
}
@Override
public String toString() {
return String.format("[%s^=%s]", key, value);
}
}
/**
* Evaluator for attribute name
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>/value matching (value ending)
*/
public static final class AttributeWithValueEnding extends AttributeKeyPair {
public AttributeWithValueEnding(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key) && element.attr(key).toLowerCase().endsWith(value); // value is lower case
}
@Override
public String toString() {
return String.format("[%s$=%s]", key, value);
}
}
/**
* Evaluator for attribute name/value matching (value containing)
*/
public static final class AttributeWithValueContaining extends AttributeKeyPair {
public AttributeWithValueContaining(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key) && element.attr(key).toLowerCase().contains(value); // value is lower case
}
@Override
public String toString() {
return String.format("[%s*=%s]", key, value);
}
}
/**
* Evaluator for attribute name/value matching (value regex matching)
*/
public static final class AttributeWithValueMatching extends Evaluator {
String key;
Pattern pattern;
public AttributeWithValueMatching(String key, Pattern pattern) {
this.key = key.trim().toLowerCase();
this.pattern = pattern;
}
@Override
public boolean matches(Element root, Element element) {
return element.hasAttr(key) && pattern.matcher(element.attr(key)).find();
}
@Override
public String toString() {
return String.format("[%s~=%s]", key, pattern.toString());
}
}
/**
* Abstract evaluator for attribute name/value matching
*/
public abstract static class AttributeKeyPair extends Evaluator {
String key;
String value;
public AttributeKeyPair(String key, String value) {
Validate.notEmpty(key);
Validate.notEmpty(value);
this.key = key.trim().toLowerCase();
this.value = value.trim().toLowerCase();
}
}
/**
* Evaluator for any / all element matching
*/
public static final class AllElements extends Evaluator {
@Override
public boolean matches(Element root, Element element)
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> {
return true;
}
@Override
public String toString() {
return "*";
}
}
/**
* Evaluator for matching by sibling index number (e < idx)
*/
public static final class IndexLessThan extends IndexEvaluator {
public IndexLessThan(int index) {
super(index);
}
@Override
public boolean matches(Element root, Element element) {
return element.elementSiblingIndex() < index;
}
@Override
public String toString() {
return String.format(":lt(%d)", index);
}
}
/**
* Evaluator for matching by sibling index number (e > idx)
*/
public static final class IndexGreaterThan extends IndexEvaluator {
public IndexGreaterThan(int index) {
super(index);
}
@Override
public boolean matches(Element root, Element element) {
return element.elementSiblingIndex() > index;
}
@Override
public String toString() {
return String.format(":gt(%d)", index);
}
}
/**
* Evaluator for matching by sibling index number (e = idx)
*/
public static final class IndexEquals extends IndexEvaluator {
public IndexEquals(int index) {
super(index);
}
@Override
public boolean matches(Element root, Element element) {
return element.elementSiblingIndex() == index;
}
@Override
public String toString() {
return String.format(":eq(%d)", index);
}
}
/**
* Abstract evaluator for sibling index matching
*
* @author ant
*/
public abstract static class IndexEvaluator extends Evaluator {
int index;
public IndexEvaluator(int index) {
this.index = index;
}
}
/**
* Evaluator for matching Element (and its descendants) text
*/
public static final class ContainsText extends Evaluator {
private String searchText;
public ContainsText(String searchText) {
this.searchText = searchText.toLowerCase();
}
@Override
public boolean matches(Element root, Element element) {
return (element.text().toLowerCase().contains(searchText));
}
@Override
public String toString() {
return String.format(":contains(%s", searchText);
}
}
/**
* Evaluator for matching Element's own text
*/
public static final class
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> ContainsOwnText extends Evaluator {
private String searchText;
public ContainsOwnText(String searchText) {
this.searchText = searchText.toLowerCase();
}
@Override
public boolean matches(Element root, Element element) {
return (element.ownText().toLowerCase().contains(searchText));
}
@Override
public String toString() {
return String.format(":containsOwn(%s", searchText);
}
}
/**
* Evaluator for matching Element (and its descendants) text with regex
*/
public static final class Matches extends Evaluator {
private Pattern pattern;
public Matches(Pattern pattern) {
this.pattern = pattern;
}
@Override
public boolean matches(Element root, Element element) {
Matcher m = pattern.matcher(element.text());
return m.find();
}
@Override
public String toString() {
return String.format(":matches(%s", pattern);
}
}
/**
* Evaluator for matching Element's own text with regex
*/
public static final class MatchesOwn extends Evaluator {
private Pattern pattern;
public MatchesOwn(Pattern pattern) {
this.pattern = pattern;
}
@Override
public boolean matches(Element root, Element element) {
Matcher m = pattern.matcher(element.ownText());
return m.find();
}
@Override
public String toString() {
return String.format(":matchesOwn(%s", pattern);
}
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.select;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.TokenQueue;
/**
* Parses a CSS selector into an Evaluator tree.
*/
class QueryParser {
private final static String[] combinators = {",", ">", "+", "~", " "};
private TokenQueue tq;
private String query;
private List<Evaluator> evals = new ArrayList<Evaluator>();
/**
* Create a new QueryParser.
* @param query CSS query
*/
private QueryParser(String query) {
this.query = query;
this.tq = new TokenQueue(query);
}
/**
* Parse a CSS query into an Evaluator.
* @param query CSS query
* @return Evaluator
*/
public static Evaluator parse(String query) {
QueryParser p = new QueryParser(query);
return p.parse();
}
/**
* Parse the query
* @return Evaluator
*/
Evaluator parse() {
tq.consumeWhitespace();
if (tq.matchesAny(combinators)) { // if starts with a combinator, use root as elements
evals.add(new StructuralEvaluator.Root());
combinator(tq.consume());
} else {
findElements();
}
while (!tq.isEmpty()) {
// hierarchy and extras
boolean seenWhite = tq.consumeWhitespace();
if (tq.matchesAny(combinators)) {
combinator(tq.consume());
} else if (seenWhite) {
combinator(' ');
} else { // E.class, E#id, E[attr] etc. AND
findElements(); // take next el, #. etc off queue
}
}
if (evals.size() == 1)
return evals.get(0);
return new CombiningEvaluator.And(evals);
}
private void combinator(char combinator) {
tq.consumeWhitespace();
String subQuery = consumeSubQuery(); // support multi > childs
Evaluator rootEval; // the new topmost evaluator
Evaluator currentEval; // the evaluator the
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> new eval will be combined to. could be root, or rightmost or.
Evaluator newEval = parse(subQuery); // the evaluator to add into target evaluator
boolean replaceRightMost = false;
if (evals.size() == 1) {
rootEval = currentEval = evals.get(0);
// make sure OR (,) has precedence:
if (rootEval instanceof CombiningEvaluator.Or && combinator != ',') {
currentEval = ((CombiningEvaluator.Or) currentEval).rightMostEvaluator();
replaceRightMost = true;
}
}
else {
rootEval = currentEval = new CombiningEvaluator.And(evals);
}
evals.clear();
// for most combinators: change the current eval into an AND of the current eval and the new eval
if (combinator == '>')
currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediateParent(currentEval));
else if (combinator == ' ')
currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.Parent(currentEval));
else if (combinator == '+')
currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediatePreviousSibling(currentEval));
else if (combinator == '~')
currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.PreviousSibling(currentEval));
else if (combinator == ',') { // group or.
CombiningEvaluator.Or or;
if (currentEval instanceof CombiningEvaluator.Or) {
or = (CombiningEvaluator.Or) currentEval;
or.add(newEval);
} else {
or = new CombiningEvaluator.Or();
or.add(currentEval);
or.add(newEval);
}
currentEval = or;
}
else
throw new Selector.SelectorParseException("Unknown combinator: " + combinator);
if (replaceRightMost)
((CombiningEvaluator.Or) rootEval).replaceRightMostEvaluator(currentEval);
else rootEval = currentEval;
evals.add(rootEval);
}
private String consumeSubQuery() {
StringBuilder sq = new StringBuilder();
while (!tq.isEmpty()) {
if (tq.matches("("))
sq.append("(").append(tq.chompBal
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>anced('(', ')')).append(")");
else if (tq.matches("["))
sq.append("[").append(tq.chompBalanced('[', ']')).append("]");
else if (tq.matchesAny(combinators))
break;
else
sq.append(tq.consume());
}
return sq.toString();
}
private void findElements() {
if (tq.matchChomp("#"))
byId();
else if (tq.matchChomp("."))
byClass();
else if (tq.matchesWord())
byTag();
else if (tq.matches("["))
byAttribute();
else if (tq.matchChomp("*"))
allElements();
else if (tq.matchChomp(":lt("))
indexLessThan();
else if (tq.matchChomp(":gt("))
indexGreaterThan();
else if (tq.matchChomp(":eq("))
indexEquals();
else if (tq.matches(":has("))
has();
else if (tq.matches(":contains("))
contains(false);
else if (tq.matches(":containsOwn("))
contains(true);
else if (tq.matches(":matches("))
matches(false);
else if (tq.matches(":matchesOwn("))
matches(true);
else if (tq.matches(":not("))
not();
else // unhandled
throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
}
private void byId() {
String id = tq.consumeCssIdentifier();
Validate.notEmpty(id);
evals.add(new Evaluator.Id(id));
}
private void byClass() {
String className = tq.consumeCssIdentifier();
Validate.notEmpty(className);
evals.add(new Evaluator.Class(className.trim().toLowerCase()));
}
private void byTag() {
String tagName = tq.consumeElementSelector();
Validate.notEmpty(tagName);
// namespaces: if element name is "abc:def", selector must be "abc|def", so flip:
if (tagName.contains("|"))
tagName = tagName.replace("|", ":");
evals.add(new Evaluator.Tag(tagName.trim().toLowerCase()));
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
private void byAttribute() {
TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue
String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
Validate.notEmpty(key);
cq.consumeWhitespace();
if (cq.isEmpty()) {
if (key.startsWith("^"))
evals.add(new Evaluator.AttributeStarting(key.substring(1)));
else
evals.add(new Evaluator.Attribute(key));
} else {
if (cq.matchChomp("="))
evals.add(new Evaluator.AttributeWithValue(key, cq.remainder()));
else if (cq.matchChomp("!="))
evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder()));
else if (cq.matchChomp("^="))
evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder()));
else if (cq.matchChomp("$="))
evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder()));
else if (cq.matchChomp("*="))
evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder()));
else if (cq.matchChomp("~="))
evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder())));
else
throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder());
}
}
private void allElements() {
evals.add(new Evaluator.AllElements());
}
// pseudo selectors :lt, :gt, :eq
private void indexLessThan() {
evals.add(new Evaluator.IndexLessThan(consumeIndex()));
}
private void indexGreaterThan() {
evals.add(new Evaluator.IndexGreaterThan(consumeIndex()));
}
private void indexEquals() {
evals.add(new Evaluator.IndexEquals(consumeIndex()));
}
private int consumeIndex() {
String indexS = tq.chompTo(")").trim();
Validate.isTrue(StringUtil.isNumeric(index
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>S), "Index must be numeric");
return Integer.parseInt(indexS);
}
// pseudo selector :has(el)
private void has() {
tq.consume(":has");
String subQuery = tq.chompBalanced('(', ')');
Validate.notEmpty(subQuery, ":has(el) subselect must not be empty");
evals.add(new StructuralEvaluator.Has(parse(subQuery)));
}
// pseudo selector :contains(text), containsOwn(text)
private void contains(boolean own) {
tq.consume(own ? ":containsOwn" : ":contains");
String searchText = TokenQueue.unescape(tq.chompBalanced('(', ')'));
Validate.notEmpty(searchText, ":contains(text) query must not be empty");
if (own)
evals.add(new Evaluator.ContainsOwnText(searchText));
else
evals.add(new Evaluator.ContainsText(searchText));
}
// :matches(regex), matchesOwn(regex)
private void matches(boolean own) {
tq.consume(own ? ":matchesOwn" : ":matches");
String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex bits will be escaped
Validate.notEmpty(regex, ":matches(regex) query must not be empty");
if (own)
evals.add(new Evaluator.MatchesOwn(Pattern.compile(regex)));
else
evals.add(new Evaluator.Matches(Pattern.compile(regex)));
}
// :not(selector)
private void not() {
tq.consume(":not");
String subQuery = tq.chompBalanced('(', ')');
Validate.notEmpty(subQuery, ":not(selector) subselect must not be empty");
evals.add(new StructuralEvaluator.Not(parse(subQuery)));
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> id='1' /><abc:def id=2>Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>";
Document doc = Jsoup.parse(h);
assertEquals("<foo:bar id=\"1\" /><abc:def id=\"2\">Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>", TextUtil.stripNewlines(doc.body().html()));
}
@Test public void handlesKnownEmptyBlocks() {
// if known tag, must be defined as self closing to allow as self closing. unkown tags can be self closing.
String h = "<div id='1' /><div id=2><img /><img></div> <hr /> hr text <hr> hr text two";
Document doc = Jsoup.parse(h);
Element div1 = doc.getElementById("1");
assertTrue(!div1.children().isEmpty()); // <div /> is treated as <div>...
assertTrue(doc.select("hr").first().children().isEmpty());
assertTrue(doc.select("hr").last().children().isEmpty());
assertTrue(doc.select("img").first().children().isEmpty());
assertTrue(doc.select("img").last().children().isEmpty());
}
@Test public void handlesSolidusAtAttributeEnd() {
// this test makes sure [<a href=/>link</a>] is parsed as [<a href="/">link</a>], not [<a href="" /><a>link</a>]
String h = "<a href=/>link</a>";
Document doc = Jsoup.parse(h);
assertEquals("<a href=\"/\">link</a>", doc.body().html());
}
@Test public void handlesMultiClosingBody() {
String h = "<body><p>Hello</body><p>there</p></body></body></html><p>now";
Document doc = Jsoup.parse(h);
assertEquals(3, doc.select("p").size());
assertEquals(3, doc.body().children().size());
}
@Test public void handlesUnclosedDefinitionLists() {
// jsoup used to create a <dl>, but that's not to spec
String h = "<dt>Foo<dd>Bar<dt>Qux<dd>Zug";
Document doc = J
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
Document doc = Jsoup.parse(html);
assertEquals("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", doc.childNode(0).outerHtml());
}
@Test public void tracksErrorsWhenRequested() {
String html = "<p>One</p href='no'><!DOCTYPE html>&arrgh;<font /><br /><foo";
Parser parser = Parser.htmlParser().setTrackErrors(500);
Document doc = Jsoup.parse(html, "http://example.com", parser);
List<ParseError> errors = parser.getErrors();
assertEquals(5, errors.size());
assertEquals("20: Attributes incorrectly present on end tag", errors.get(0).toString());
assertEquals("35: Unexpected token [Doctype] when in state [InBody]", errors.get(1).toString());
assertEquals("36: Invalid character reference: invalid named referenece 'arrgh'", errors.get(2).toString());
assertEquals("50: Self closing flag not acknowledged", errors.get(3).toString());
assertEquals("61: Unexpectedly reached end of file (EOF) in input state [TagName]", errors.get(4).toString());
}
@Test public void tracksLimitedErrorsWhenRequested() {
String html = "<p>One</p href='no'><!DOCTYPE html>&arrgh;<font /><br /><foo";
Parser parser = Parser.htmlParser().setTrackErrors(3);
Document doc = parser.parseInput(html, "http://example.com");
List<ParseError> errors = parser.getErrors();
assertEquals(3, errors.size());
assertEquals("20: Attributes incorrectly present on end tag", errors.get(0).toString());
assertEquals("35: Unexpected token [Doctype] when in state [InBody]", errors.get(1).toString());
assertEquals("36: Invalid character reference: invalid named referenece 'arrgh'", errors.get(2).toString());
}
@Test public void noErrorsByDefault() {
String html = "<p>One
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS></p href='no'>&arrgh;<font /><br /><foo";
Parser parser = Parser.htmlParser();
Document doc = Jsoup.parse(html, "http://example.com", parser);
List<ParseError> errors = parser.getErrors();
assertEquals(0, errors.size());
}
@Test public void handlesCommentsInTable() {
String html = "<table><tr><td>text</td><!-- Comment --></tr></table>";
Document node = Jsoup.parseBodyFragment(html);
assertEquals("<html><head></head><body><table><tbody><tr><td>text</td><!-- Comment --></tr></tbody></table></body></html>", TextUtil.stripNewlines(node.outerHtml()));
}
@Test public void handlesQuotesInCommentsInScripts() {
String html = "<script>\n" +
" <!--\n" +
" document.write('</scr' + 'ipt>');\n" +
" // -->\n" +
"</script>";
Document node = Jsoup.parseBodyFragment(html);
assertEquals("<script>\n" +
" <!--\n" +
" document.write('</scr' + 'ipt>');\n" +
" // -->\n" +
"</script>", node.body().html());
}
@Test public void handleNullContextInParseFragment() {
String html = "<ol><li>One</li></ol><p>Two</p>";
List<Node> nodes = Parser.parseFragment(html, null, "http://example.com/");
assertEquals(1, nodes.size()); // returns <html> node (not document) -- no context means doc gets created
assertEquals("html", nodes.get(0).nodeName());
assertEquals("<html> <head></head> <body> <ol> <li>One</li> </ol> <p>Two</p> </body> </html>", StringUtil.normaliseWhitespace(nodes.get(0).outerHtml()));
}
@Test public void doesNotFindShortestMatchingEntity() {
// previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
// (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>);
query = query.trim();
Validate.notEmpty(query);
Validate.notNull(root);
this.evaluator = QueryParser.parse(query);
this.root = root;
}
/**
* Find elements matching selector.
*
* @param query CSS selector
* @param root root element to descend into
* @return matching elements, empty if not
*/
public static Elements select(String query, Element root) {
return new Selector(query, root).select();
}
/**
* Find elements matching selector.
*
* @param query CSS selector
* @param roots root elements to descend into
* @return matching elements, empty if not
*/
public static Elements select(String query, Iterable<Element> roots) {
Validate.notEmpty(query);
Validate.notNull(roots);
LinkedHashSet<Element> elements = new LinkedHashSet<Element>();
for (Element root : roots) {
elements.addAll(select(query, root));
}
return new Elements(elements);
}
private Elements select() {
return Collector.collect(evaluator, root);
}
// exclude set. package open so that Elements can implement .not() selector.
static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
Elements output = new Elements();
for (Element el : elements) {
boolean found = false;
for (Element out : outs) {
if (el.equals(out)) {
found = true;
break;
}
}
if (!found)
output.add(el);
}
return output;
}
public static class SelectorParseException extends IllegalStateException {
public SelectorParseException(String msg, Object... params) {
super(String.format(msg, params));
}
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> a block tag.
*
* @return if block tag
*/
public boolean isBlock() {
return isBlock;
}
/**
* Gets if this tag should be formatted as a block (or as inline)
*
* @return if should be formatted as block or inline
*/
public boolean formatAsBlock() {
return formatAsBlock;
}
/**
* Gets if this tag can contain block tags.
*
* @return if tag can contain block tags
*/
public boolean canContainBlock() {
return canContainBlock;
}
/**
* Gets if this tag is an inline tag.
*
* @return if this tag is an inline tag.
*/
public boolean isInline() {
return !isBlock;
}
/**
* Gets if this tag is a data only tag.
*
* @return if this tag is a data only tag
*/
public boolean isData() {
return !canContainInline && !isEmpty();
}
/**
* Get if this is an empty tag
*
* @return if this is an empty tag
*/
public boolean isEmpty() {
return empty;
}
/**
* Get if this tag is self closing.
*
* @return if this tag should be output as self closing.
*/
public boolean isSelfClosing() {
return empty || selfClosing;
}
/**
* Get if this is a pre-defined tag, or was auto created on parsing.
*
* @return if a known tag
*/
public boolean isKnownTag() {
return tags.containsKey(tagName);
}
/**
* Check if this tagname is a known tag.
*
* @param tagName name of tag
* @return if known HTML tag
*/
public static boolean isKnownTag(String tagName) {
return tags.containsKey(tagName);
}
/**
* Get if this tag should preserve whitespace within child text nodes.
*
* @return if preserve whitepace
*/
public boolean preserveWhitespace() {
return preserveWhitespace;
}
Tag setSelfClosing() {
selfClosing = true;
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Tag)) return false;
Tag tag = (Tag) o;
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
import org.jsoup.select.Collector;
import org.jsoup.select.Elements;
import org.jsoup.select.Evaluator;
import org.jsoup.select.Selector;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
* other elements).
*
* From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Element extends Node {
private Tag tag;
private Set<String> classNames;
/**
* Create a new, standalone Element. (Standalone in that is has no parent.)
*
* @param tag tag of this element
* @param baseUri the base URI
* @param attributes initial attributes
* @see #appendChild(Node)
* @see #appendElement(String)
*/
public Element(Tag tag, String baseUri, Attributes attributes) {
super(baseUri, attributes);
Validate.notNull(tag);
this.tag = tag;
}
/**
* Create a new Element from a tag and a base URI.
*
* @param tag element tag
* @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
* string, but not null.
* @see Tag#valueOf(String)
*/
public Element(Tag tag, String baseUri) {
this(tag, baseUri, new Attributes());
}
@Override
public String nodeName() {
return tag.getName();
}
/**
* Get the name of the tag for this element. E.g. {@code div}
*
* @return the tag name
*/
public String tagName() {
return tag.getName();
}
/**
* Change the tag of this element. For example, convert a {@code <span>} to a {@code <
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>h1").attr("id", "header").text("Welcome");}
*/
public Element prependElement(String tagName) {
Element child = new Element(Tag.valueOf(tagName), baseUri());
prependChild(child);
return child;
}
/**
* Create and append a new TextNode to this element.
*
* @param text the unencoded text to add
* @return this element
*/
public Element appendText(String text) {
TextNode node = new TextNode(text, baseUri());
appendChild(node);
return this;
}
/**
* Create and prepend a new TextNode to this element.
*
* @param text the unencoded text to add
* @return this element
*/
public Element prependText(String text) {
TextNode node = new TextNode(text, baseUri());
prependChild(node);
return this;
}
/**
* Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
* @param html HTML to add inside this element, after the existing HTML
* @return this element
* @see #html(String)
*/
public Element append(String html) {
Validate.notNull(html);
List<Node> nodes = Parser.parseFragment(html, this, baseUri());
addChildren(nodes.toArray(new Node[nodes.size()]));
return this;
}
/**
* Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
* @param html HTML to add inside this element, before the existing HTML
* @return this element
* @see #html(String)
*/
public Element prepend(String html) {
Validate.notNull(html);
List<Node> nodes = Parser.parseFragment(html, this, baseUri());
addChildren(0, nodes.toArray(new Node[nodes.size()]));
return this;
}
/**
* Insert the specified HTML into the DOM before this element (i.e. as a preceding sibling).
*
* @param html HTML to add before this element
* @return this element, for chaining
* @see #after(String)
*/
@Override
public Element before(String html
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>
* <p>
* For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.text()} returns {@code "Hello there now!"}
*
* @return unencoded text, or empty string if none.
* @see #ownText()
* @see #textNodes()
*/
public String text() {
StringBuilder sb = new StringBuilder();
text(sb);
return sb.toString().trim();
}
private void text(StringBuilder accum) {
appendWhitespaceIfBr(this, accum);
for (Node child : childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
appendNormalisedText(accum, textNode);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock() && !TextNode.lastCharIsWhitespace(accum))
accum.append(" ");
element.text(accum);
}
}
}
/**
* Gets the text owned by this element only; does not get the combined text of all children.
* <p>
* For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
* whereas {@code p.text()} returns {@code "Hello there now!"}.
* Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
*
* @return unencoded text, or empty string if none.
* @see #text()
* @see #textNodes()
*/
public String ownText() {
StringBuilder sb = new StringBuilder();
ownText(sb);
return sb.toString().trim();
}
private void ownText(StringBuilder accum) {
for (Node child : childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
appendNormalisedText(accum, textNode);
} else if (child instanceof Element) {
appendWhitespaceIfBr((Element) child, accum);
}
}
}
private void appendNormalisedText(StringBuilder accum, TextNode
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> textNode) {
String text = textNode.getWholeText();
if (!preserveWhitespace()) {
text = TextNode.normaliseWhitespace(text);
if (TextNode.lastCharIsWhitespace(accum))
text = TextNode.stripLeadingWhitespace(text);
}
accum.append(text);
}
private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
accum.append(" ");
}
boolean preserveWhitespace() {
return tag.preserveWhitespace() || parent() != null && parent().preserveWhitespace();
}
/**
* Set the text of this element. Any existing contents (text or elements) will be cleared
* @param text unencoded text
* @return this element
*/
public Element text(String text) {
Validate.notNull(text);
empty();
TextNode textNode = new TextNode(text, baseUri);
appendChild(textNode);
return this;
}
/**
Test if this element has any text content (that is not just whitespace).
@return true if element has non-blank text content.
*/
public boolean hasText() {
for (Node child: childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
if (!textNode.isBlank())
return true;
} else if (child instanceof Element) {
Element el = (Element) child;
if (el.hasText())
return true;
}
}
return false;
}
/**
* Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
* @return the data, or empty string if none
*
* @see #dataNodes()
*/
public String data() {
StringBuilder sb = new StringBuilder();
for (Node childNode : childNodes) {
if (childNode instanceof DataNode) {
DataNode data = (DataNode) childNode;
sb.append(data.getWholeData());
} else if (childNode instanceof Element) {
Element element = (Element) childNode;
String elementData = element.data();
sb.append(elementData);
}
}
return sb.toString
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> element's {@code class} attribute.
@param className class name to remove
@return this element
*/
public Element removeClass(String className) {
Validate.notNull(className);
Set<String> classes = classNames();
classes.remove(className);
classNames(classes);
return this;
}
/**
Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
@param className class name to toggle
@return this element
*/
public Element toggleClass(String className) {
Validate.notNull(className);
Set<String> classes = classNames();
if (classes.contains(className))
classes.remove(className);
else
classes.add(className);
classNames(classes);
return this;
}
/**
* Get the value of a form element (input, textarea, etc).
* @return the value of the form element, or empty string if not set.
*/
public String val() {
if (tagName().equals("textarea"))
return text();
else
return attr("value");
}
/**
* Set the value of a form element (input, textarea, etc).
* @param value value to set
* @return this element (for chaining)
*/
public Element val(String value) {
if (tagName().equals("textarea"))
text(value);
else
attr("value", value);
return this;
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
if (accum.length() > 0 && out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock())))
indent(accum, depth, out);
accum
.append("<")
.append(tagName());
attributes.html(accum, out);
if (childNodes.isEmpty() && tag.isSelfClosing())
accum.append(" />");
else
accum.append(">");
}
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
if (out.prettyPrint() && !childNodes.isEmpty() && tag.formatAsBlock())
indent(accum, depth, out);
accum.append("</").append(tagName()).append
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>(">");
}
}
/**
* Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
* {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
*
* @return String of HTML.
* @see #outerHtml()
*/
public String html() {
StringBuilder accum = new StringBuilder();
html(accum);
return accum.toString().trim();
}
private void html(StringBuilder accum) {
for (Node node : childNodes)
node.outerHtml(accum);
}
/**
* Set this element's inner HTML. Clears the existing HTML first.
* @param html HTML to parse and set into this element
* @return this element
* @see #append(String)
*/
public Element html(String html) {
empty();
append(html);
return this;
}
public String toString() {
return outerHtml();
}
@Override
public boolean equals(Object o) {
return this == o;
}
@Override
public int hashCode() {
// todo: fixup, not very useful
int result = super.hashCode();
result = 31 * result + (tag != null ? tag.hashCode() : 0);
return result;
}
@Override
public Element clone() {
Element clone = (Element) super.clone();
clone.classNames(); // creates linked set of class names from class attribute
return clone;
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.parser;
import org.jsoup.helper.DescendableLinkedList;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.List;
/**
* @author Jonathan Hedley
*/
abstract class TreeBuilder {
CharacterReader reader;
Tokeniser tokeniser;
protected Document doc; // current doc we are building into
protected DescendableLinkedList<Element> stack; // the stack of open elements
protected String baseUri; // current base uri, for creating new elements
protected Token currentToken; // currentToken is used only for error tracking.
protected ParseErrorList errors; // null when not tracking errors
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
Validate.notNull(input, "String input must not be null");
Validate.notNull(baseUri, "BaseURI must not be null");
doc = new Document(baseUri);
reader = new CharacterReader(input);
this.errors = errors;
tokeniser = new Tokeniser(reader, errors);
stack = new DescendableLinkedList<Element>();
this.baseUri = baseUri;
}
Document parse(String input, String baseUri) {
return parse(input, baseUri, ParseErrorList.noTracking());
}
Document parse(String input, String baseUri, ParseErrorList errors) {
initialiseParse(input, baseUri, errors);
runParser();
return doc;
}
protected void runParser() {
while (true) {
Token token = tokeniser.read();
process(token);
if (token.type == Token.TokenType.EOF)
break;
}
}
protected abstract boolean process(Token token);
protected Element currentElement() {
return stack.getLast();
}
}
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> text up to the offset, and will have a new text node sibling containing the text after the offset.
* @param offset string offset point to split node at.
* @return the newly created text node containing the text after the offset.
*/
public TextNode splitText(int offset) {
Validate.isTrue(offset >= 0, "Split offset must be not be negative");
Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
String head = getWholeText().substring(0, offset);
String tail = getWholeText().substring(offset);
text(head);
TextNode tailNode = new TextNode(tail, this.baseUri());
if (parent() != null)
parent().addChildren(siblingIndex()+1, tailNode);
return tailNode;
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
String html = Entities.escape(getWholeText(), out);
if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
html = normaliseWhitespace(html);
}
if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank())
indent(accum, depth, out);
accum.append(html);
}
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
public String toString() {
return outerHtml();
}
/**
* Create a new TextNode from HTML encoded (aka escaped) data.
* @param encodedText Text containing encoded HTML (e.g. &lt;)
* @return TextNode containing unencoded data (e.g. <)
*/
public static TextNode createFromEncoded(String encodedText, String baseUri) {
String text = Entities.unescape(encodedText);
return new TextNode(text, baseUri);
}
static String normaliseWhitespace(String text) {
text = StringUtil.normaliseWhitespace(text);
return text;
}
static String stripLeadingWhitespace(String text) {
return text.replaceFirst("^\\s+", "");
}
static boolean lastCharIsWhitespace(StringBuilder sb) {
return sb.length()
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.nodes;
import org.jsoup.helper.Validate;
import java.util.Map;
/**
A single key + value attribute. Keys are trimmed and normalised to lower-case.
@author Jonathan Hedley, jonathan@hedley.net */
public class Attribute implements Map.Entry<String, String>, Cloneable {
private String key;
private String value;
/**
* Create a new attribute from unencoded (raw) key and value.
* @param key attribute key
* @param value attribute value
* @see #createFromEncoded
*/
public Attribute(String key, String value) {
Validate.notEmpty(key);
Validate.notNull(value);
this.key = key.trim().toLowerCase();
this.value = value;
}
/**
Get the attribute key.
@return the attribute key
*/
public String getKey() {
return key;
}
/**
Set the attribute key. Gets normalised as per the constructor method.
@param key the new key; must not be null
*/
public void setKey(String key) {
Validate.notEmpty(key);
this.key = key.trim().toLowerCase();
}
/**
Get the attribute value.
@return the attribute value
*/
public String getValue() {
return value;
}
/**
Set the attribute value.
@param value the new attribute value; must not be null
*/
public String setValue(String value) {
Validate.notNull(value);
String old = this.value;
this.value = value;
return old;
}
/**
Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
@return HTML
*/
public String html() {
return key + "=\"" + Entities.escape(value, (new Document("")).outputSettings()) + "\"";
}
protected void html(StringBuilder accum, Document.OutputSettings out) {
accum
.append(key)
.append("=\"")
.append(Entities.escape(value, out))
.append("\"");
}
/**
Get the string representation of this attribute, implemented as {@link #html()}.
@return string
*/
public String toString() {
return html();
}
/**
* Create
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS>package org.jsoup.helper;
import java.util.Collection;
import java.util.Iterator;
/**
* A minimal String utility class. Designed for internal jsoup use only.
*/
public final class StringUtil {
// memoised padding up to 10
private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
/**
* Join a collection of strings by a seperator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Collection strings, String sep) {
return join(strings.iterator(), sep);
}
/**
* Join a collection of strings by a seperator
* @param strings iterator of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Iterator strings, String sep) {
if (!strings.hasNext())
return "";
String start = strings.next().toString();
if (!strings.hasNext()) // only one, avoid builder
return start;
StringBuilder sb = new StringBuilder(64).append(start);
while (strings.hasNext()) {
sb.append(sep);
sb.append(strings.next());
}
return sb.toString();
}
/**
* Returns space padding
* @param width amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
if (width < padding.length)
return padding[width];
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
return String.valueOf(out);
}
/**
* Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
if (string == null || string.length() == 0)
return true;
int l = string.length();
Jsoup, 28
<FILEB>
<CHANGES>
import org.jsoup.parser.Parser;
<CHANGEE>
<CHANGES>
public static boolean isBaseNamedEntity(String name) {
return base.containsKey(name);
}
<CHANGEE>
<CHANGES>
return Parser.unescapeEntities(string, strict);
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
public static String unescapeEntities(String string, boolean inAttribute) {
Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking());
return tokeniser.unescapeEntities(inAttribute);
}
<CHANGEE>
<FILEE>
<FILEB>
<CHANGES>
<CHANGEE>
<CHANGES>
boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
<CHANGEE>
<CHANGES>
characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
<CHANGEE>
<CHANGES>
String unescapeEntities(boolean inAttribute) {
StringBuilder builder = new StringBuilder();
while (!reader.isEmpty()) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
<CHANGEE>
<FILEE>
<FILEB>
package org.jsoup.nodes;
<CHANGES>
<CHANGEE>
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
*/
public class Entities {
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private Entities() {}
/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
/**
* Check if the input is a known named entity in the base entity set.
* @param name the possible entity name (e.g. "lt" or "amp")
* @return true if a known named entity in the base set
* @see #isNamedEntity(String)
*/
<CHANGES>
<CHANGEE>
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
*/
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
accum.append('&').append(map.get(c)).<SCANS> for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chrs
*/
public static boolean isNumeric(String string) {
if (string == null || string.length() == 0)
return false;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!Character.isDigit(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a code point is "whitespace" as defined in the HTML spec.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
*/
public static boolean isWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}
public static String normaliseWhitespace(String string) {
StringBuilder sb = new StringBuilder(string.length());
boolean lastWasWhite = false;
boolean modified = false;
int l = string.length();
int c;
for (int i = 0; i < l; i+= Character.charCount(c)) {
c = string.codePointAt(i);
if (isWhitespace(c)) {
if (lastWasWhite) {
modified = true;
continue;
}
if (c != ' ')
modified = true;
sb.append(' ');
lastWasWhite = true;
}
else {
sb.appendCodePoint(c);
lastWasWhite = false;
}
}
return modified ? sb.toString() : string;
}
public static boolean in(String needle, String... haystack) {
for (String hay : haystack) {
if (hay.equals(needle))
return true;
}
return false;
}
}